delaytop: add psi info to show system delay

Support showing whole delay of system by reading PSI, just like the first
few lines of information output by the top command.  the output of
delaytop includes both system-wide delay and delay of individual tasks,
providing a more comprehensive reflection of system latency status.

Use case
========
bash# ./delaytop
System Pressure Information: (avg10/avg60/avg300/total)
CPU:    full:    0.0%/   0.0%/   0.0%/0           some:    0.1%/   0.0%/   0.0%/14216596
Memory: full:    0.0%/   0.0%/   0.0%/34010659    some:    0.0%/   0.0%/   0.0%/35406492
IO:     full:    0.1%/   0.0%/   0.0%/51029453    some:    0.1%/   0.0%/   0.0%/55330465
IRQ:    full:    0.0%/   0.0%/   0.0%/0

Top 20 processes (sorted by CPU delay):

  PID   TGID  COMMAND            CPU(ms)  IO(ms)        SWAP(ms) RCL(ms) THR(ms)  CMP(ms)  WP(ms)  IRQ(ms)
---------------------------------------------------------------------------------------------
   32     32  kworker/2:0H-sy   23.65     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  497    497  kworker/R-scsi_    1.20     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  495    495  kworker/R-scsi_    1.13     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  494    494  scsi_eh_0          1.12     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  485    485  kworker/R-ata_s    0.90     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  574    574  kworker/R-kdmfl    0.36     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   34     34  idle_inject/3      0.33     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1123   1123  nde-netfilter      0.28     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   60     60  ksoftirqd/7        0.25     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  114    114  kworker/0:2-cgr    0.25     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  496    496  scsi_eh_1          0.24     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   51     51  cpuhp/6            0.24     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1667   1667  atd                0.24     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   45     45  cpuhp/5            0.23     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1102   1102  nde-backupservi    0.22     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1098   1098  systemsettings     0.21     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1100   1100  audit-monitor      0.20     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   53     53  migration/6        0.20     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1482   1482  sshd               0.19     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   39     39  cpuhp/4            0.19     0.00     0.00     0.00    0.00     0.00     0.00     0.00

Link: https://lkml.kernel.org/r/20250710135451340_5pOgpIFi0M5AE7H44W1D@zte.com.cn
Co-developed-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Signed-off-by: Jiang Kun <jiang.kun2@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Peilin He <he.peilin@zte.com.cn>
Cc: Qiang Tu <tu.qiang35@zte.com.cn>
Cc: wangyong <wang.yong12@zte.com.cn>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: Yunkai Zhang <zhang.yunkai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Wang Yaxin
2025-07-10 13:54:51 +08:00
committed by Andrew Morton
parent 599579e857
commit 6b47c9f8ee

View File

@@ -10,9 +10,9 @@
* individual tasks (PIDs).
*
* Key features:
* - Collects per-task delay accounting statistics via taskstats.
* - Supports sorting, filtering.
* - Supports both interactive (screen refresh).
* - Collects per-task delay accounting statistics via taskstats.
* - Supports sorting, filtering.
* - Supports both interactive (screen refresh).
*
* Copyright (C) Fan Yu, ZTE Corp. 2025
* Copyright (C) Wang Yaxin, ZTE Corp. 2025
@@ -43,6 +43,14 @@
#include <linux/cgroupstats.h>
#include <ncurses.h>
#define PSI_CPU_SOME "/proc/pressure/cpu"
#define PSI_CPU_FULL "/proc/pressure/cpu"
#define PSI_MEMORY_SOME "/proc/pressure/memory"
#define PSI_MEMORY_FULL "/proc/pressure/memory"
#define PSI_IO_SOME "/proc/pressure/io"
#define PSI_IO_FULL "/proc/pressure/io"
#define PSI_IRQ_FULL "/proc/pressure/irq"
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
@@ -66,6 +74,24 @@ struct config {
char *container_path; /* Path to container cgroup */
};
/* PSI statistics structure */
struct psi_stats {
double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
unsigned long long cpu_some_total;
double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
unsigned long long cpu_full_total;
double memory_some_avg10, memory_some_avg60, memory_some_avg300;
unsigned long long memory_some_total;
double memory_full_avg10, memory_full_avg60, memory_full_avg300;
unsigned long long memory_full_total;
double io_some_avg10, io_some_avg60, io_some_avg300;
unsigned long long io_some_total;
double io_full_avg10, io_full_avg60, io_full_avg300;
unsigned long long io_full_total;
double irq_full_avg10, irq_full_avg60, irq_full_avg300;
unsigned long long irq_full_total;
};
/* Task delay information structure */
struct task_info {
int pid;
@@ -100,6 +126,7 @@ struct container_stats {
/* Global variables */
static struct config cfg;
static struct psi_stats psi;
static struct task_info tasks[MAX_TASKS];
static int task_count;
static int running = 1;
@@ -130,13 +157,13 @@ static void usage(void)
{
printf("Usage: delaytop [Options]\n"
"Options:\n"
" -h, --help Show this help message and exit\n"
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
" -o, --once Display once and exit\n"
" -p, --pid=PID Monitor only the specified PID\n"
" -C, --container=PATH Monitor the container at specified cgroup path\n");
" -h, --help Show this help message and exit\n"
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
" -o, --once Display once and exit\n"
" -p, --pid=PID Monitor only the specified PID\n"
" -C, --container=PATH Monitor the container at specified cgroup path\n");
exit(0);
}
@@ -276,7 +303,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
sizeof(nladdr))) < buflen) {
sizeof(nladdr))) < buflen) {
if (r > 0) {
buf += r;
buflen -= r;
@@ -320,6 +347,89 @@ static int get_family_id(int sd)
return id;
}
static void read_psi_stats(void)
{
FILE *fp;
char line[256];
int ret = 0;
/* Zero all fields */
memset(&psi, 0, sizeof(psi));
/* CPU pressure */
fp = fopen(PSI_CPU_SOME, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
&psi.cpu_some_avg300, &psi.cpu_some_total);
if (ret != 4)
fprintf(stderr, "Failed to parse CPU some PSI data\n");
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
&psi.cpu_full_avg300, &psi.cpu_full_total);
if (ret != 4)
fprintf(stderr, "Failed to parse CPU full PSI data\n");
}
}
fclose(fp);
}
/* Memory pressure */
fp = fopen(PSI_MEMORY_SOME, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.memory_some_avg10, &psi.memory_some_avg60,
&psi.memory_some_avg300, &psi.memory_some_total);
if (ret != 4)
fprintf(stderr, "Failed to parse Memory some PSI data\n");
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.memory_full_avg10, &psi.memory_full_avg60,
&psi.memory_full_avg300, &psi.memory_full_total);
}
if (ret != 4)
fprintf(stderr, "Failed to parse Memory full PSI data\n");
}
fclose(fp);
}
/* IO pressure */
fp = fopen(PSI_IO_SOME, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.io_some_avg10, &psi.io_some_avg60,
&psi.io_some_avg300, &psi.io_some_total);
if (ret != 4)
fprintf(stderr, "Failed to parse IO some PSI data\n");
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.io_full_avg10, &psi.io_full_avg60,
&psi.io_full_avg300, &psi.io_full_total);
if (ret != 4)
fprintf(stderr, "Failed to parse IO full PSI data\n");
}
}
fclose(fp);
}
/* IRQ pressure (only full) */
fp = fopen(PSI_IRQ_FULL, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.irq_full_avg10, &psi.irq_full_avg60,
&psi.irq_full_avg300, &psi.irq_full_total);
if (ret != 4)
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
}
}
fclose(fp);
}
}
static int read_comm(int pid, char *comm_buf, size_t buf_size)
{
char path[64];
@@ -549,7 +659,29 @@ static void display_results(void)
FILE *out = stdout;
fprintf(out, "\033[H\033[J");
/* PSI output (one-line, no cat style) */
fprintf(out, "System Pressure Information: ");
fprintf(out, "(avg10/avg60/avg300/total)\n");
fprintf(out, "CPU:");
fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10,
psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total);
fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10,
psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total);
fprintf(out, "Memory:");
fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10,
psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total);
fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10,
psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total);
fprintf(out, "IO:");
fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10,
psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total);
fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10,
psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total);
fprintf(out, "IRQ:");
fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10,
psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total);
if (cfg.container_path) {
fprintf(out, "Container Information (%s):\n", cfg.container_path);
fprintf(out, "Processes: running=%d, sleeping=%d, ",
@@ -559,8 +691,8 @@ static void display_results(void)
container_stats.nr_io_wait);
}
fprintf(out, "Top %d processes (sorted by CPU delay):\n\n",
cfg.max_processes);
fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) ");
cfg.max_processes);
fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) ");
fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n");
fprintf(out, "-----------------------------------------------");
fprintf(out, "----------------------------------------------\n");
@@ -616,6 +748,9 @@ int main(int argc, char **argv)
/* Main loop */
while (running) {
/* Read PSI statistics */
read_psi_stats();
/* Get container stats if container path provided */
if (cfg.container_path)
get_container_stats();