From bdd051e249141c793dec28544e7f5d5bc7690bf3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 9 Dec 2025 18:33:24 -0800 Subject: [PATCH] perf record: Split --data-mmap option Currently -d/--data option controls both PERF_SAMPLE_ADDR bit and perf_event_attr.mmap_data flag. Separate them using new --data-mmap option to support recording only one of them. For data-type profiling, data MMAP is unnecessary but it wastes a lot of space in the ring buffer and data file. Committer testing: On an idle system: root@x1:~# perf record -d -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 5.672 MB perf.data (1075 samples) ] root@x1:~# ls -la perf.data -rw-------. 1 root root 5982480 Dec 16 15:34 perf.data root@x1:~# perf evlist -v cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 144, config: 0xa00000000 (cpu_atom/PERF_COUNT_HW_CPU_CYCLES/), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|IDENTIFIER|DATA_SRC, read_format: ID|LOST, disabled: 1, freq: 1, precise_ip: 3, sample_id_all: 1 cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 144, config: 0x400000000 (cpu_core/PERF_COUNT_HW_CPU_CYCLES/), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|IDENTIFIER|DATA_SRC, read_format: ID|LOST, disabled: 1, freq: 1, precise_ip: 3, sample_id_all: 1 dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 144, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|ADDR|CPU|IDENTIFIER|DATA_SRC, read_format: ID|LOST, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, build_id: 1 root@x1:~# Now with just --data-mmap we will not save that much, as only DATA_SRC will not be enabled in sample_type: root@x1:~# perf record --data-mmap -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 5.576 MB perf.data (716 samples) ] root@x1:~# ls -la perf.data -rw-------. 1 root root 5880112 Dec 16 15:37 perf.data root@x1:~# perf evlist -v cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 144, config: 0xa00000000 (cpu_atom/PERF_COUNT_HW_CPU_CYCLES/), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, freq: 1, precise_ip: 3, sample_id_all: 1 cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 144, config: 0x400000000 (cpu_core/PERF_COUNT_HW_CPU_CYCLES/), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, freq: 1, precise_ip: 3, sample_id_all: 1 dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 144, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID|LOST, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, build_id: 1 root@x1:~# To complete, just with DATA_SRC, no mmap_data: root@x1:~# perf record --sample-mem-info -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.407 MB perf.data (1311 samples) ] root@x1:~# ls -la perf.data -rw-------. 1 root root 1509224 Dec 16 15:40 perf.data root@x1:~# perf evlist -v cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 144, config: 0xa00000000 (cpu_atom/PERF_COUNT_HW_CPU_CYCLES/), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER|DATA_SRC, read_format: ID|LOST, disabled: 1, freq: 1, precise_ip: 3, sample_id_all: 1 cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 144, config: 0x400000000 (cpu_core/PERF_COUNT_HW_CPU_CYCLES/), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER|DATA_SRC, read_format: ID|LOST, disabled: 1, freq: 1, precise_ip: 3, sample_id_all: 1 dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 144, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC, read_format: ID|LOST, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, build_id: 1 root@x1:~# Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 8 +++++++- tools/perf/builtin-record.c | 19 +++++++++++++------ tools/perf/util/evsel.c | 5 +++-- tools/perf/util/record.h | 2 ++ 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e8b9aadbbfa5..c402e74172f6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -344,7 +344,8 @@ OPTIONS -d:: --data:: - Record the sample virtual addresses. Implies --sample-mem-info. + Record the sample virtual addresses. Implies --sample-mem-info and + --data-mmap. --phys-data:: Record the sample physical addresses. @@ -861,6 +862,11 @@ filtered through the mask provided by -C option. Prepare BPF filter to be used by regular users. The action should be either "pin" or "unpin". The filter can be used after it's pinned. +--data-mmap:: + Enable recording MMAP events for non-executable mappings. Basically + perf only records executable mappings but data mmaping can be useful + when you analyze data access with sample addresses. So using -d option + would enable this unless you specify --no-data-mmap manually. include::intel-hybrid.txt[] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2584d0d8bc82..cbfbd9bb1063 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1881,7 +1881,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) process_synthesized_event, &rec->session->machines.host, needs_mmap, - rec->opts.sample_address); + rec->opts.record_data_mmap); perf_thread_map__put(thread_map); return err; } @@ -2191,7 +2191,7 @@ static int record__synthesize(struct record *rec, bool tail) err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, - f, needs_mmap, opts->sample_address, + f, needs_mmap, opts->record_data_mmap, rec->opts.nr_threads_synthesize); } @@ -3006,8 +3006,9 @@ int record_opts__parse_callchain(struct record_opts *record, ret = parse_callchain_record_opt(arg, callchain); if (!ret) { /* Enable data address sampling for DWARF unwind. */ - if (callchain->record_mode == CALLCHAIN_DWARF) - record->sample_address = true; + if (callchain->record_mode == CALLCHAIN_DWARF && + !record->record_data_mmap_set) + record->record_data_mmap = true; callchain_debug(callchain); } @@ -3686,6 +3687,9 @@ static struct option __record_options[] = { OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)", record__parse_off_cpu_thresh), + OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap, + &record.opts.record_data_mmap_set, + "Record mmap events for non-executable mappings"), OPT_END() }; @@ -4249,9 +4253,12 @@ int cmd_record(int argc, const char **argv) goto out_opts; } - /* For backward compatibility, -d implies --mem-info */ - if (rec->opts.sample_address) + /* For backward compatibility, -d implies --mem-info and --data-mmap */ + if (rec->opts.sample_address) { rec->opts.sample_data_src = true; + if (!rec->opts.record_data_mmap_set) + rec->opts.record_data_mmap = true; + } /* * Allow aliases to facilitate the lookup of symbols for address diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9cd706f62793..ec6552a6f667 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1445,10 +1445,11 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->inherit_stat = 1; } - if (opts->sample_address) { + if (opts->sample_address) evsel__set_sample_bit(evsel, ADDR); + + if (opts->record_data_mmap) attr->mmap_data = track; - } /* * We don't allow user space callchains for function trace diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index ea3a6c4657ee..93627c9a7338 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -40,6 +40,8 @@ struct record_opts { bool record_cgroup; bool record_switch_events; bool record_switch_events_set; + bool record_data_mmap; + bool record_data_mmap_set; bool all_kernel; bool all_user; bool kernel_callchains;