mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-05 18:55:42 -05:00
Pull perf tools updates from Namhyung Kim:
"perf record:
- Enable leader sampling for inherited task events. It was supported
only for system-wide events but the kernel started to support such
a setup since v6.12.
This is to reduce the number of PMU interrupts. The samples of the
leader event will contain counts of other events and no samples
will be generated for the other member events.
$ perf record -e '{cycles,instructions}:S' ${MYPROG}
perf report:
- Fix --branch-history option to display more branch-related
information like prediction, abort and cycles which is available
on Intel machines.
$ perf record -bg -- perf test -w brstack
$ perf report --branch-history
...
#
# Overhead Source:Line Symbol Shared Object Predicted Abort Cycles IPC [IPC Coverage]
# ........ ........................ .............. .................... ......... ..... ...... ....................
#
8.17% copy_page_64.S:19 [k] copy_page [kernel.kallsyms] 50.0% 0 5 - -
|
---xas_load xarray.h:171
|
|--5.68%--xas_load xarray.c:245 (cycles:1)
| xas_load xarray.c:242
| xas_load xarray.h:1260 (cycles:1)
| xas_descend xarray.c:146
| xas_load xarray.c:244 (cycles:2)
| xas_load xarray.c:245
| xas_descend xarray.c:218 (cycles:10)
...
perf stat:
- Add HWMON PMU support.
The HWMON provides various system information like CPU/GPU
temperature, fan speed and so on. Expose them as PMU events so that
users can see the values using perf stat commands.
$ perf stat -e temp_cpu,fan1 true
Performance counter stats for 'true':
60.00 'C temp_cpu
0 rpm fan1
0.000745382 seconds time elapsed
0.000883000 seconds user
0.000000000 seconds sys
- Display metric threshold in JSON output.
Some metrics define thresholds to classify value ranges. It used to
be in a different color but it won't work for JSON.
Add "metric-threshold" field to the JSON that can be one of "good",
"less good", "nearly bad" and "bad".
# perf stat -a -M TopdownL1 -j true
{"counter-value" : "18693525.000000", "unit" : "", "event" : "TOPDOWN.SLOTS", "event-runtime" : 5552708, "pcnt-running" : 100.00, "metric-value" : "43.226002", "metric-unit" : "% tma_backend_bound", "metric-threshold" : "bad"}
{"metric-value" : "29.212267", "metric-unit" : "% tma_frontend_bound", "metric-threshold" : "bad"}
{"metric-value" : "7.138972", "metric-unit" : "% tma_bad_speculation", "metric-threshold" : "good"}
{"metric-value" : "20.422759", "metric-unit" : "% tma_retiring", "metric-threshold" : "good"}
{"counter-value" : "3817732.000000", "unit" : "", "event" : "topdown-retiring", "event-runtime" : 5552708, "pcnt-running" : 100.00, }
{"counter-value" : "5472824.000000", "unit" : "", "event" : "topdown-fe-bound", "event-runtime" : 5552708, "pcnt-running" : 100.00, }
{"counter-value" : "7984780.000000", "unit" : "", "event" : "topdown-be-bound", "event-runtime" : 5552708, "pcnt-running" : 100.00, }
{"counter-value" : "1418181.000000", "unit" : "", "event" : "topdown-bad-spec", "event-runtime" : 5552708, "pcnt-running" : 100.00, }
...
perf sched:
- Add -P/--pre-migrations option for 'timehist' sub-command to track
time a task waited on a run-queue before migrating to a different
CPU.
$ perf sched timehist -P
time cpu task name wait time sch delay run time pre-mig time
[tid/pid] (msec) (msec) (msec) (msec)
--------------- ------ ------------------------------ --------- --------- --------- ---------
585940.535527 [0000] perf[584885] 0.000 0.000 0.000 0.000
585940.535535 [0000] migration/0[20] 0.000 0.002 0.008 0.000
585940.535559 [0001] perf[584885] 0.000 0.000 0.000 0.000
585940.535563 [0001] migration/1[25] 0.000 0.001 0.004 0.000
585940.535678 [0002] perf[584885] 0.000 0.000 0.000 0.000
585940.535686 [0002] migration/2[31] 0.000 0.002 0.008 0.000
585940.535905 [0001] <idle> 0.000 0.000 0.342 0.000
585940.535938 [0003] perf[584885] 0.000 0.000 0.000 0.000
585940.537048 [0001] sleep[584886] 0.000 0.019 1.142 0.001
585940.537749 [0002] <idle> 0.000 0.000 2.062 0.000
...
Build:
- Make libunwind opt-in (LIBUNWIND=1) rather than opt-out.
The perf tools are generally built with libelf and libdw which has
unwinder functionality. The libunwind support predates it and no
need to have duplicate unwinders by default.
- Rename NO_DWARF=1 build option to NO_LIBDW=1 in order to clarify
it's using libdw for handling DWARF information.
Internals:
- Do not set exclude_guest bit in the perf_event_attr by default.
This was causing a trouble in AMD IBS PMU as it doesn't support the
bit. The bit will be set when it's needed later by the fallback
logic. Also update the missing feature detection logic to make sure
not clear supported bits unnecessarily.
- Run perf test in parallel by default and mark flaky tests
"exclusive" to run them serially at the end. Some test numbers are
changed but the test can complete in less than half the time.
JSON vendor events:
- Add AMD Zen 5 events and metrics.
- Add i.MX91 and i.MX95 DDR metrics
- Fix HiSilicon HIP08 Topdown metric name.
- Support compat events on PowerPC"
* tag 'perf-tools-for-v6.13-2024-11-24' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (232 commits)
perf tests: Fix hwmon parsing with PMU name test
perf hwmon_pmu: Ensure hwmon key union is zeroed before use
perf tests hwmon_pmu: Remove double evlist__delete()
perf/test: fix perf ftrace test on s390
perf bpf-filter: Return -ENOMEM directly when pfi allocation fails
perf test: Correct hwmon test PMU detection
perf: Remove unused del_perf_probe_events()
perf pmu: Move pmu_metrics_table__find and remove ARM override
perf jevents: Add map_for_cpu()
perf header: Pass a perf_cpu rather than a PMU to get_cpuid_str
perf header: Avoid transitive PMU includes
perf arm64 header: Use cpu argument in get_cpuid
perf header: Refactor get_cpuid to take a CPU for ARM
perf header: Move is_cpu_online to numa bench
perf jevents: fix breakage when do perf stat on system metric
perf test: Add missing __exit calls in tool/hwmon tests
perf tests: Make leader sampling test work without branch event
perf util: Remove kernel version deadcode
perf test shell trace_exit_race: Use --no-comm to avoid cases where COMM isn't resolved
perf test shell trace_exit_race: Show what went wrong in verbose mode
...
308 lines
9.9 KiB
Gherkin
308 lines
9.9 KiB
Gherkin
# SPDX-License-Identifier: GPL-2.0-only
|
|
feature_dir := $(srctree)/tools/build/feature
|
|
|
|
ifneq ($(OUTPUT),)
|
|
OUTPUT_FEATURES = $(OUTPUT)feature/
|
|
$(shell mkdir -p $(OUTPUT_FEATURES))
|
|
endif
|
|
|
|
feature_check = $(eval $(feature_check_code))
|
|
define feature_check_code
|
|
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
|
|
endef
|
|
|
|
feature_set = $(eval $(feature_set_code))
|
|
define feature_set_code
|
|
feature-$(1) := 1
|
|
endef
|
|
|
|
#
|
|
# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
|
|
#
|
|
|
|
#
|
|
# Note that this is not a complete list of all feature tests, just
|
|
# those that are typically built on a fully configured system.
|
|
#
|
|
# [ Feature tests not mentioned here have to be built explicitly in
|
|
# the rule that uses them - an example for that is the 'bionic'
|
|
# feature check. ]
|
|
#
|
|
FEATURE_TESTS_BASIC := \
|
|
backtrace \
|
|
libdw \
|
|
eventfd \
|
|
fortify-source \
|
|
get_current_dir_name \
|
|
gettid \
|
|
glibc \
|
|
libbfd \
|
|
libbfd-buildid \
|
|
libcap \
|
|
libelf \
|
|
libelf-getphdrnum \
|
|
libelf-gelf_getnote \
|
|
libelf-getshdrstrndx \
|
|
libnuma \
|
|
numa_num_possible_cpus \
|
|
libperl \
|
|
libpython \
|
|
libslang \
|
|
libslang-include-subdir \
|
|
libtraceevent \
|
|
libtracefs \
|
|
libcpupower \
|
|
libcrypto \
|
|
libunwind \
|
|
pthread-attr-setaffinity-np \
|
|
pthread-barrier \
|
|
reallocarray \
|
|
stackprotector-all \
|
|
timerfd \
|
|
zlib \
|
|
lzma \
|
|
get_cpuid \
|
|
bpf \
|
|
scandirat \
|
|
sched_getcpu \
|
|
sdt \
|
|
setns \
|
|
libaio \
|
|
libzstd \
|
|
disassembler-four-args \
|
|
disassembler-init-styled \
|
|
file-handle
|
|
|
|
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
|
|
# of all feature tests
|
|
FEATURE_TESTS_EXTRA := \
|
|
bionic \
|
|
compile-32 \
|
|
compile-x32 \
|
|
cplus-demangle \
|
|
cxa-demangle \
|
|
gtk2 \
|
|
gtk2-infobar \
|
|
hello \
|
|
libbabeltrace \
|
|
libcapstone \
|
|
libbfd-liberty \
|
|
libbfd-liberty-z \
|
|
libopencsd \
|
|
libunwind-x86 \
|
|
libunwind-x86_64 \
|
|
libunwind-arm \
|
|
libunwind-aarch64 \
|
|
libunwind-debug-frame \
|
|
libunwind-debug-frame-arm \
|
|
libunwind-debug-frame-aarch64 \
|
|
cxx \
|
|
llvm \
|
|
clang \
|
|
libbpf \
|
|
libbpf-btf__load_from_kernel_by_id \
|
|
libbpf-bpf_prog_load \
|
|
libbpf-bpf_object__next_program \
|
|
libbpf-bpf_object__next_map \
|
|
libbpf-bpf_program__set_insns \
|
|
libbpf-bpf_create_map \
|
|
libpfm4 \
|
|
libdebuginfod \
|
|
clang-bpf-co-re
|
|
|
|
|
|
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
|
|
|
|
ifeq ($(FEATURE_TESTS),all)
|
|
FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA)
|
|
endif
|
|
|
|
FEATURE_DISPLAY ?= \
|
|
libdw \
|
|
glibc \
|
|
libbfd \
|
|
libbfd-buildid \
|
|
libcap \
|
|
libelf \
|
|
libnuma \
|
|
numa_num_possible_cpus \
|
|
libperl \
|
|
libpython \
|
|
libcrypto \
|
|
libunwind \
|
|
libcapstone \
|
|
llvm-perf \
|
|
zlib \
|
|
lzma \
|
|
get_cpuid \
|
|
bpf \
|
|
libaio \
|
|
libzstd
|
|
|
|
#
|
|
# Declare group members of a feature to display the logical OR of the detection
|
|
# result instead of each member result.
|
|
#
|
|
FEATURE_GROUP_MEMBERS-libbfd = libbfd-liberty libbfd-liberty-z
|
|
|
|
#
|
|
# Declare list of feature dependency packages that provide pkg-config files.
|
|
#
|
|
FEATURE_PKG_CONFIG ?= \
|
|
libtraceevent \
|
|
libtracefs
|
|
|
|
feature_pkg_config = $(eval $(feature_pkg_config_code))
|
|
define feature_pkg_config_code
|
|
FEATURE_CHECK_CFLAGS-$(1) := $(shell $(PKG_CONFIG) --cflags $(1) 2>/dev/null)
|
|
FEATURE_CHECK_LDFLAGS-$(1) := $(shell $(PKG_CONFIG) --libs $(1) 2>/dev/null)
|
|
endef
|
|
|
|
# Set FEATURE_CHECK_(C|LD)FLAGS-$(package) for packages using pkg-config.
|
|
ifneq ($(PKG_CONFIG),)
|
|
$(foreach package,$(FEATURE_PKG_CONFIG),$(call feature_pkg_config,$(package)))
|
|
endif
|
|
|
|
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
|
|
# If in the future we need per-feature checks/flags for features not
|
|
# mentioned in this list we need to refactor this ;-).
|
|
set_test_all_flags = $(eval $(set_test_all_flags_code))
|
|
define set_test_all_flags_code
|
|
FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1))
|
|
FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1))
|
|
endef
|
|
|
|
$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat)))
|
|
|
|
#
|
|
# Special fast-path for the 'all features are available' case:
|
|
#
|
|
$(call feature_check,all,$(MSG))
|
|
|
|
#
|
|
# Just in case the build freshly failed, make sure we print the
|
|
# feature matrix:
|
|
#
|
|
ifeq ($(feature-all), 1)
|
|
#
|
|
# test-all.c passed - just set all the core feature flags to 1:
|
|
#
|
|
$(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
|
|
#
|
|
# test-all.c does not comprise these tests, so we need to
|
|
# for this case to get features proper values
|
|
#
|
|
$(call feature_check,compile-32)
|
|
$(call feature_check,compile-x32)
|
|
$(call feature_check,bionic)
|
|
$(call feature_check,libbabeltrace)
|
|
else
|
|
$(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
|
|
endif
|
|
|
|
#
|
|
# Print the result of the feature test:
|
|
#
|
|
feature_print_status = $(eval $(feature_print_status_code))
|
|
|
|
feature_group = $(eval $(feature_gen_group)) $(GROUP)
|
|
|
|
define feature_gen_group
|
|
GROUP := $(1)
|
|
ifneq ($(feature_verbose),1)
|
|
GROUP += $(FEATURE_GROUP_MEMBERS-$(1))
|
|
endif
|
|
endef
|
|
|
|
define feature_print_status_code
|
|
ifneq (,$(filter 1,$(foreach feat,$(call feature_group,$(feat)),$(feature-$(feat)))))
|
|
MSG = $(shell printf '...%40s: [ \033[32mon\033[m ]' $(1))
|
|
else
|
|
MSG = $(shell printf '...%40s: [ \033[31mOFF\033[m ]' $(1))
|
|
endif
|
|
endef
|
|
|
|
feature_print_text = $(eval $(feature_print_text_code))
|
|
define feature_print_text_code
|
|
MSG = $(shell printf '...%40s: %s' $(1) $(2))
|
|
endef
|
|
|
|
#
|
|
# generates feature value assignment for name, like:
|
|
# $(call feature_assign,libdw) == feature-libdw=1
|
|
#
|
|
feature_assign = feature-$(1)=$(feature-$(1))
|
|
|
|
FEATURE_DUMP_FILENAME = $(OUTPUT)FEATURE-DUMP$(FEATURE_USER)
|
|
FEATURE_DUMP := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME))
|
|
|
|
feature_dump_check = $(eval $(feature_dump_check_code))
|
|
define feature_dump_check_code
|
|
ifeq ($(findstring $(1),$(FEATURE_DUMP)),)
|
|
$(2) := 1
|
|
endif
|
|
endef
|
|
|
|
#
|
|
# First check if any test from FEATURE_DISPLAY
|
|
# and set feature_display := 1 if it does
|
|
$(foreach feat,$(FEATURE_DISPLAY),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_display))
|
|
|
|
#
|
|
# Now also check if any other test changed,
|
|
# so we force FEATURE-DUMP generation
|
|
$(foreach feat,$(FEATURE_TESTS),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_dump_changed))
|
|
|
|
# The $(feature_display) controls the default detection message
|
|
# output. It's set if:
|
|
# - detected features differes from stored features from
|
|
# last build (in $(FEATURE_DUMP_FILENAME) file)
|
|
# - one of the $(FEATURE_DISPLAY) is not detected
|
|
# - VF is enabled
|
|
|
|
ifeq ($(feature_dump_changed),1)
|
|
$(shell rm -f $(FEATURE_DUMP_FILENAME))
|
|
$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))
|
|
endif
|
|
|
|
feature_display_check = $(eval $(feature_check_display_code))
|
|
define feature_check_display_code
|
|
ifneq ($(feature-$(1)), 1)
|
|
feature_display := 1
|
|
endif
|
|
endef
|
|
|
|
$(foreach feat,$(FEATURE_DISPLAY),$(call feature_display_check,$(feat)))
|
|
|
|
ifeq ($(VF),1)
|
|
feature_display := 1
|
|
feature_verbose := 1
|
|
endif
|
|
|
|
ifneq ($(feature_verbose),1)
|
|
#
|
|
# Determine the features to omit from the displayed message, as only the
|
|
# logical OR of the detection result will be shown.
|
|
#
|
|
FEATURE_OMIT := $(foreach feat,$(FEATURE_DISPLAY),$(FEATURE_GROUP_MEMBERS-$(feat)))
|
|
endif
|
|
|
|
feature_display_entries = $(eval $(feature_display_entries_code))
|
|
define feature_display_entries_code
|
|
ifeq ($(feature_display),1)
|
|
$$(info )
|
|
$$(info Auto-detecting system features:)
|
|
$(foreach feat,$(filter-out $(FEATURE_OMIT),$(FEATURE_DISPLAY)),$(call feature_print_status,$(feat),) $$(info $(MSG)))
|
|
endif
|
|
|
|
ifeq ($(feature_verbose),1)
|
|
$(eval TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)))
|
|
$(foreach feat,$(TMP),$(call feature_print_status,$(feat),) $$(info $(MSG)))
|
|
endif
|
|
endef
|
|
|
|
ifeq ($(FEATURE_DISPLAY_DEFERRED),)
|
|
$(call feature_display_entries)
|
|
$(info )
|
|
endif
|