mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 10:01:39 -05:00
Add --rb-overwrite option to benchmark BPF ring buffer in overwrite mode. Since overwrite mode is not yet supported by libbpf for consumer, also add --rb-bench-producer option to benchmark producer directly without a consumer. Benchmarks on an x86_64 and an arm64 CPU are shown below for reference. - AMD EPYC 9654 (x86_64) Ringbuf, multi-producer contention in overwrite mode, no consumer ================================================================= rb-prod nr_prod 1 32.180 ± 0.033M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 9.617 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 8.810 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 9.272 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 9.173 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.086 ± 0.032M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 2.945 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 2.519 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 2.545 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 2.363 ± 0.024M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 2.357 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.267 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.284 ± 0.020M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.215 ± 0.025M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.193 ± 0.023M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 52 2.208 ± 0.024M/s (drops 0.000 ± 0.000M/s) - HiSilicon Kunpeng 920 (arm64) Ringbuf, multi-producer contention in overwrite mode, no consumer ================================================================= rb-prod nr_prod 1 14.478 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 21.787 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 6.045 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 5.352 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 4.850 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.542 ± 0.016M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 3.509 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 3.171 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 3.154 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 2.974 ± 0.015M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 3.167 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.903 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.866 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.914 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.806 ± 0.012M/s (drops 0.000 ± 0.000M/s) Rb-prod nr_prod 52 2.840 ± 0.012M/s (drops 0.000 ± 0.000M/s) Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20251018035738.4039621-4-xukuohai@huaweicloud.com
56 lines
2.2 KiB
Bash
Executable File
56 lines
2.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
source ./benchs/run_common.sh
|
|
|
|
set -eufo pipefail
|
|
|
|
RUN_RB_BENCH="$RUN_BENCH -c1"
|
|
|
|
header "Single-producer, parallel producer"
|
|
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
|
|
summarize $b "$($RUN_RB_BENCH $b)"
|
|
done
|
|
|
|
header "Single-producer, parallel producer, sampled notification"
|
|
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
|
|
summarize $b "$($RUN_RB_BENCH --rb-sampled $b)"
|
|
done
|
|
|
|
header "Single-producer, back-to-back mode"
|
|
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
|
|
summarize $b "$($RUN_RB_BENCH --rb-b2b $b)"
|
|
summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)"
|
|
done
|
|
|
|
header "Ringbuf back-to-back, effect of sample rate"
|
|
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
|
|
summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
|
|
done
|
|
header "Perfbuf back-to-back, effect of sample rate"
|
|
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
|
|
summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
|
|
done
|
|
|
|
header "Ringbuf back-to-back, reserve+commit vs output"
|
|
summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)"
|
|
summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)"
|
|
|
|
header "Ringbuf sampled, reserve+commit vs output"
|
|
summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)"
|
|
summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)"
|
|
|
|
header "Single-producer, consumer/producer competing on the same CPU, low batch count"
|
|
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
|
|
summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
|
|
done
|
|
|
|
header "Ringbuf, multi-producer contention"
|
|
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
|
|
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
|
|
done
|
|
|
|
header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
|
|
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
|
|
summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
|
|
done
|