perf jevents: Add legacy-hardware and legacy-cache json

The legacy-hardware.json is added containing hardware events similarly
to the software.json file. A difference is that for the software PMU
the name is known and matches sysfs. In the legacy-hardware.json no
Unit/PMU is specified for the events meaning default_core is used and
the events will appear for all core PMUs.

There are potentially 1216 legacy cache events, rather than list them
in a json file add a make_legacy_cache.py helper to generate them.

By using json for legacy hardware and cache events: descriptions of
the events can be added; events can be marked as deprecated, such as
those misleadingly named l2 (deprecated is also used to mark all
events that weren't previously displayed in perf list); and the name
lookup becomes case insensitive.

The C string encoding all the perf events and metrics is increased in
size by 123,499 bytes which will increase the perf binary size. Later
changes will remove hard coded event parsing for legacy hardware and
cache events, turning parsing overhead into a binary search during
event lookup.

That event descriptions are based off of those in perf_event_open man
page, credit to Vince Weaver <vincent.weaver@maine.edu>.

Tested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
Ian Rogers
2025-10-05 11:24:16 -07:00
committed by Namhyung Kim
parent 249a4c6d01
commit 0012e0fa22
4 changed files with 2814 additions and 140 deletions

View File

@@ -12,6 +12,8 @@ PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c
METRIC_TEST_LOG = $(OUTPUT)pmu-events/metric_test.log
TEST_EMPTY_PMU_EVENTS_C = $(OUTPUT)pmu-events/test-empty-pmu-events.c
EMPTY_PMU_EVENTS_TEST_LOG = $(OUTPUT)pmu-events/empty-pmu-events.log
LEGACY_CACHE_PY = pmu-events/make_legacy_cache.py
LEGACY_CACHE_JSON = $(OUTPUT)pmu-events/arch/common/common/legacy-cache.json
ifeq ($(JEVENTS_ARCH),)
JEVENTS_ARCH=$(SRCARCH)
@@ -33,7 +35,11 @@ $(OUTPUT)pmu-events/arch/%: pmu-events/arch/%
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)cp $< $@
GEN_JSON = $(patsubst %,$(OUTPUT)%,$(JSON))
$(LEGACY_CACHE_JSON): $(LEGACY_CACHE_PY)
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)$(PYTHON) $(LEGACY_CACHE_PY) > $@
GEN_JSON = $(patsubst %,$(OUTPUT)%,$(JSON)) $(LEGACY_CACHE_JSON)
$(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY)
$(call rule_mkdir)

View File

@@ -0,0 +1,72 @@
[
{
"EventName": "cpu-cycles",
"BriefDescription": "Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cycles].",
"LegacyConfigCode": "0"
},
{
"EventName": "cycles",
"BriefDescription": "Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cpu-cycles].",
"LegacyConfigCode": "0"
},
{
"EventName": "instructions",
"BriefDescription": "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.",
"LegacyConfigCode": "1"
},
{
"EventName": "cache-references",
"BriefDescription": "Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.",
"LegacyConfigCode": "2"
},
{
"EventName": "cache-misses",
"BriefDescription": "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates.",
"LegacyConfigCode": "3"
},
{
"EventName": "branches",
"BriefDescription": "Retired branch instructions [This event is an alias of branch-instructions].",
"LegacyConfigCode": "4"
},
{
"EventName": "branch-instructions",
"BriefDescription": "Retired branch instructions [This event is an alias of branches].",
"LegacyConfigCode": "4"
},
{
"EventName": "branch-misses",
"BriefDescription": "Mispredicted branch instructions.",
"LegacyConfigCode": "5"
},
{
"EventName": "bus-cycles",
"BriefDescription": "Bus cycles, which can be different from total cycles.",
"LegacyConfigCode": "6"
},
{
"EventName": "stalled-cycles-frontend",
"BriefDescription": "Stalled cycles during issue [This event is an alias of idle-cycles-frontend].",
"LegacyConfigCode": "7"
},
{
"EventName": "idle-cycles-frontend",
"BriefDescription": "Stalled cycles during issue [This event is an alias of stalled-cycles-fronted].",
"LegacyConfigCode": "7"
},
{
"EventName": "stalled-cycles-backend",
"BriefDescription": "Stalled cycles during retirement [This event is an alias of idle-cycles-backend].",
"LegacyConfigCode": "8"
},
{
"EventName": "idle-cycles-backend",
"BriefDescription": "Stalled cycles during retirement [This event is an alias of stalled-cycles-backend].",
"LegacyConfigCode": "8"
},
{
"EventName": "ref-cycles",
"BriefDescription": "Total cycles; not affected by CPU frequency scaling.",
"LegacyConfigCode": "9"
}
]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,129 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
import json
hw_cache_id = [
(0, # PERF_COUNT_HW_CACHE_L1D
["L1-dcache", "l1-d", "l1d", "L1-data",],
[0, 1, 2,], # read, write, prefetch
"Level 1 data cache",
),
(1, # PERF_COUNT_HW_CACHE_L1I
["L1-icache", "l1-i", "l1i", "L1-instruction",],
[0, 2,], # read, prefetch
"Level 1 instruction cache",
),
(2, # PERF_COUNT_HW_CACHE_LL
["LLC", "L2"],
[0, 1, 2,], # read, write, prefetch
"Last level cache",
),
(3, # PERF_COUNT_HW_CACHE_DTLB
["dTLB", "d-tlb", "Data-TLB",],
[0, 1, 2,], # read, write, prefetch
"Data TLB",
),
(4, # PERF_COUNT_HW_CACHE_ITLB
["iTLB", "i-tlb", "Instruction-TLB",],
[0,], # read
"Instruction TLB",
),
(5, # PERF_COUNT_HW_CACHE_BPU
["branch", "branches", "bpu", "btb", "bpc",],
[0,], # read
"Branch prediction unit",
),
(6, # PERF_COUNT_HW_CACHE_NODE
["node",],
[0, 1, 2,], # read, write, prefetch
"Local memory",
),
]
hw_cache_op = [
(0, # PERF_COUNT_HW_CACHE_OP_READ
["load", "loads", "read",],
"read"),
(1, # PERF_COUNT_HW_CACHE_OP_WRITE
["store", "stores", "write",],
"write"),
(2, # PERF_COUNT_HW_CACHE_OP_PREFETCH
["prefetch", "prefetches", "speculative-read", "speculative-load",],
"prefetch"),
]
hw_cache_result = [
(0, # PERF_COUNT_HW_CACHE_RESULT_ACCESS
["refs", "Reference", "ops", "access",],
"accesses"),
(1, # PERF_COUNT_HW_CACHE_RESULT_MISS
["misses", "miss",],
"misses"),
]
events = []
def add_event(name: str,
cache_id: int, cache_op: int, cache_result: int,
desc: str,
deprecated: bool) -> None:
# Avoid conflicts with PERF_TYPE_HARDWARE events which are higher priority.
if name in ["branch-misses", "branches"]:
return
# Tweak and deprecate L2 named events.
if name.startswith("L2"):
desc = desc.replace("Last level cache", "Level 2 (or higher) last level cache")
deprecated = True
event = {
"EventName": name,
"BriefDescription": desc,
"LegacyCacheCode": f"0x{cache_id | (cache_op << 8) | (cache_result << 16):06x}",
}
# Deprecate events with the name starting L2 as it is actively
# confusing as on many machines it actually means the L3 cache.
if deprecated:
event["Deprecated"] = "1"
events.append(event)
for (cache_id, names, ops, cache_desc) in hw_cache_id:
for name in names:
add_event(name,
cache_id,
0, # PERF_COUNT_HW_CACHE_OP_READ
0, # PERF_COUNT_HW_CACHE_RESULT_ACCESS
f"{cache_desc} read accesses.",
deprecated=True)
for (op, op_names, op_desc) in hw_cache_op:
if op not in ops:
continue
for op_name in op_names:
deprecated = (names[0] != name or op_names[1] != op_name)
add_event(f"{name}-{op_name}",
cache_id,
op,
0, # PERF_COUNT_HW_CACHE_RESULT_ACCESS
f"{cache_desc} {op_desc} accesses.",
deprecated)
for (result, result_names, result_desc) in hw_cache_result:
for result_name in result_names:
deprecated = ((names[0] != name or op_names[0] != op_name) or
(result == 0) or (result_names[0] != result_name))
add_event(f"{name}-{op_name}-{result_name}",
cache_id, op, result,
f"{cache_desc} {op_desc} {result_desc}.",
deprecated)
for (result, result_names, result_desc) in hw_cache_result:
for result_name in result_names:
add_event(f"{name}-{result_name}",
cache_id,
0, # PERF_COUNT_HW_CACHE_OP_READ
result,
f"{cache_desc} read {result_desc}.",
deprecated=True)
print(json.dumps(events, indent=2))