mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 08:45:26 -05:00
perf tools: Merge deferred user callchains
Save samples with deferred callchains in a separate list and deliver
them after merging the user callchains. If users don't want to merge
they can set tool->merge_deferred_callchains to false to prevent the
behavior.
With previous result, now perf script will show the merged callchains.
$ perf script
...
pwd 2312 121.163435: 249113 cpu/cycles/P:
ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
...
The old output can be get using --no-merge-callchain option.
Also perf report can get the user callchain entry at the end.
$ perf report --no-children --stdio -q -S __build_id_parse.isra.0
# symbol: __build_id_parse.isra.0
8.40% pwd [kernel.kallsyms]
|
---__build_id_parse.isra.0
perf_event_mmap
mprotect_fixup
do_mprotect_pkey
__x64_sys_mprotect
do_syscall_64
entry_SYSCALL_64_after_hwframe
mprotect
_dl_sysdep_start
_dl_start_user
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
@@ -527,6 +527,11 @@ include::itrace.txt[]
|
||||
The known limitations include exception handing such as
|
||||
setjmp/longjmp will have calls/returns not match.
|
||||
|
||||
--merge-callchains::
|
||||
Enable merging deferred user callchains if available. This is the
|
||||
default behavior. If you want to see separate CALLCHAIN_DEFERRED
|
||||
records for some reason, use --no-merge-callchains explicitly.
|
||||
|
||||
:GMEXAMPLECMD: script
|
||||
:GMEXAMPLESUBCMD:
|
||||
include::guest-files.txt[]
|
||||
|
||||
@@ -2527,6 +2527,7 @@ int cmd_inject(int argc, const char **argv)
|
||||
inject.tool.auxtrace = perf_event__repipe_auxtrace;
|
||||
inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
|
||||
inject.tool.dont_split_sample_group = true;
|
||||
inject.tool.merge_deferred_callchains = false;
|
||||
inject.session = __perf_session__new(&data, &inject.tool,
|
||||
/*trace_event_repipe=*/inject.output.is_pipe,
|
||||
/*host_env=*/NULL);
|
||||
|
||||
@@ -1614,6 +1614,7 @@ int cmd_report(int argc, const char **argv)
|
||||
report.tool.event_update = perf_event__process_event_update;
|
||||
report.tool.feature = process_feature_event;
|
||||
report.tool.ordering_requires_timestamps = true;
|
||||
report.tool.merge_deferred_callchains = !dump_trace;
|
||||
|
||||
session = perf_session__new(&data, &report.tool);
|
||||
if (IS_ERR(session)) {
|
||||
|
||||
@@ -4009,6 +4009,7 @@ int cmd_script(int argc, const char **argv)
|
||||
bool header_only = false;
|
||||
bool script_started = false;
|
||||
bool unsorted_dump = false;
|
||||
bool merge_deferred_callchains = true;
|
||||
char *rec_script_path = NULL;
|
||||
char *rep_script_path = NULL;
|
||||
struct perf_session *session;
|
||||
@@ -4162,6 +4163,8 @@ int cmd_script(int argc, const char **argv)
|
||||
"Guest code can be found in hypervisor process"),
|
||||
OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
|
||||
"Enable LBR callgraph stitching approach"),
|
||||
OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
|
||||
"Enable merge deferred user callchains"),
|
||||
OPTS_EVSWITCH(&script.evswitch),
|
||||
OPT_END()
|
||||
};
|
||||
@@ -4418,6 +4421,7 @@ int cmd_script(int argc, const char **argv)
|
||||
script.tool.throttle = process_throttle_event;
|
||||
script.tool.unthrottle = process_throttle_event;
|
||||
script.tool.ordering_requires_timestamps = true;
|
||||
script.tool.merge_deferred_callchains = merge_deferred_callchains;
|
||||
session = perf_session__new(&data, &script.tool);
|
||||
if (IS_ERR(session))
|
||||
return PTR_ERR(session);
|
||||
|
||||
@@ -1838,3 +1838,38 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function merges earlier samples (@sample_orig) waiting for deferred
|
||||
* user callchains with the matching callchain record (@sample_callchain)
|
||||
* which is delivered now. The @sample_orig->callchain should be released
|
||||
* after use if ->deferred_callchain is set.
|
||||
*/
|
||||
int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
|
||||
struct perf_sample *sample_callchain)
|
||||
{
|
||||
u64 nr_orig = sample_orig->callchain->nr - 1;
|
||||
u64 nr_deferred = sample_callchain->callchain->nr;
|
||||
struct ip_callchain *callchain;
|
||||
|
||||
if (sample_orig->callchain->nr < 2) {
|
||||
sample_orig->deferred_callchain = false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
|
||||
if (callchain == NULL) {
|
||||
sample_orig->deferred_callchain = false;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
callchain->nr = nr_orig + nr_deferred;
|
||||
/* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
|
||||
memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
|
||||
/* copy deferred user callchains */
|
||||
memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
|
||||
nr_deferred * sizeof(u64));
|
||||
|
||||
sample_orig->callchain = callchain;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -318,4 +318,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
|
||||
struct perf_sample *sample, int max_stack,
|
||||
bool symbols, callchain_iter_fn cb, void *data);
|
||||
|
||||
int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
|
||||
struct perf_sample *sample_callchain);
|
||||
|
||||
#endif /* __PERF_CALLCHAIN_H */
|
||||
|
||||
@@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
|
||||
evlist->ctl_fd.pos = -1;
|
||||
evlist->nr_br_cntr = -1;
|
||||
metricgroup__rblist_init(&evlist->metric_events);
|
||||
INIT_LIST_HEAD(&evlist->deferred_samples);
|
||||
}
|
||||
|
||||
struct evlist *evlist__new(void)
|
||||
|
||||
@@ -92,6 +92,8 @@ struct evlist {
|
||||
* of struct metric_expr.
|
||||
*/
|
||||
struct rblist metric_events;
|
||||
/* samples with deferred_callchain would wait here. */
|
||||
struct list_head deferred_samples;
|
||||
};
|
||||
|
||||
struct evsel_str_handler {
|
||||
|
||||
@@ -1285,6 +1285,66 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
|
||||
per_thread);
|
||||
}
|
||||
|
||||
/*
|
||||
* Samples with deferred callchains should wait for the next matching
|
||||
* PERF_RECORD_CALLCHAIN_RECORD entries. Keep the events in a list and
|
||||
* deliver them once it finds the callchains.
|
||||
*/
|
||||
struct deferred_event {
|
||||
struct list_head list;
|
||||
union perf_event *event;
|
||||
};
|
||||
|
||||
static int evlist__deliver_deferred_callchain(struct evlist *evlist,
|
||||
const struct perf_tool *tool,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct deferred_event *de, *tmp;
|
||||
struct evsel *evsel;
|
||||
int ret = 0;
|
||||
|
||||
if (!tool->merge_deferred_callchains) {
|
||||
evsel = evlist__id2evsel(evlist, sample->id);
|
||||
return tool->callchain_deferred(tool, event, sample,
|
||||
evsel, machine);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
|
||||
struct perf_sample orig_sample;
|
||||
|
||||
ret = evlist__parse_sample(evlist, de->event, &orig_sample);
|
||||
if (ret < 0) {
|
||||
pr_err("failed to parse original sample\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (sample->tid != orig_sample.tid)
|
||||
continue;
|
||||
|
||||
if (event->callchain_deferred.cookie == orig_sample.deferred_cookie)
|
||||
sample__merge_deferred_callchain(&orig_sample, sample);
|
||||
else
|
||||
orig_sample.deferred_callchain = false;
|
||||
|
||||
evsel = evlist__id2evsel(evlist, orig_sample.id);
|
||||
ret = evlist__deliver_sample(evlist, tool, de->event,
|
||||
&orig_sample, evsel, machine);
|
||||
|
||||
if (orig_sample.deferred_callchain)
|
||||
free(orig_sample.callchain);
|
||||
|
||||
list_del(&de->list);
|
||||
free(de->event);
|
||||
free(de);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int machines__deliver_event(struct machines *machines,
|
||||
struct evlist *evlist,
|
||||
union perf_event *event,
|
||||
@@ -1313,6 +1373,22 @@ static int machines__deliver_event(struct machines *machines,
|
||||
return 0;
|
||||
}
|
||||
dump_sample(evsel, event, sample, perf_env__arch(machine->env));
|
||||
if (sample->deferred_callchain && tool->merge_deferred_callchains) {
|
||||
struct deferred_event *de = malloc(sizeof(*de));
|
||||
size_t sz = event->header.size;
|
||||
|
||||
if (de == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
de->event = malloc(sz);
|
||||
if (de->event == NULL) {
|
||||
free(de);
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(de->event, event, sz);
|
||||
list_add_tail(&de->list, &evlist->deferred_samples);
|
||||
return 0;
|
||||
}
|
||||
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
|
||||
case PERF_RECORD_MMAP:
|
||||
return tool->mmap(tool, event, sample, machine);
|
||||
@@ -1372,7 +1448,8 @@ static int machines__deliver_event(struct machines *machines,
|
||||
return tool->aux_output_hw_id(tool, event, sample, machine);
|
||||
case PERF_RECORD_CALLCHAIN_DEFERRED:
|
||||
dump_deferred_callchain(evsel, event, sample);
|
||||
return tool->callchain_deferred(tool, event, sample, evsel, machine);
|
||||
return evlist__deliver_deferred_callchain(evlist, tool, event,
|
||||
sample, machine);
|
||||
default:
|
||||
++evlist->stats.nr_unknown_events;
|
||||
return -1;
|
||||
|
||||
@@ -266,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
|
||||
tool->cgroup_events = false;
|
||||
tool->no_warn = false;
|
||||
tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
|
||||
tool->merge_deferred_callchains = true;
|
||||
|
||||
tool->sample = process_event_sample_stub;
|
||||
tool->mmap = process_event_stub;
|
||||
@@ -448,6 +449,7 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate)
|
||||
tool->tool.cgroup_events = delegate->cgroup_events;
|
||||
tool->tool.no_warn = delegate->no_warn;
|
||||
tool->tool.show_feat_hdr = delegate->show_feat_hdr;
|
||||
tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains;
|
||||
|
||||
tool->tool.sample = delegate_sample;
|
||||
tool->tool.read = delegate_read;
|
||||
|
||||
@@ -90,6 +90,7 @@ struct perf_tool {
|
||||
bool cgroup_events;
|
||||
bool no_warn;
|
||||
bool dont_split_sample_group;
|
||||
bool merge_deferred_callchains;
|
||||
enum show_feature_header show_feat_hdr;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user