From 5104ffb229c357d9672344126040721e5dc4cc7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Feb 2016 10:14:50 -0300 Subject: [PATCH 01/19] perf tools: Use asprintf() for simple string formatting/allocation No need to use strbuf there, its just a simple alloc+formatting, which asprintf does just fine. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6q6cxfhk8c8ypg3tfpo0i2iy@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 144047c396f0..f6321194937f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -454,11 +454,12 @@ static void handle_internal_command(int argc, const char **argv) static void execv_dashed_external(const char **argv) { - struct strbuf cmd = STRBUF_INIT; + char *cmd; const char *tmp; int status; - strbuf_addf(&cmd, "perf-%s", argv[0]); + if (asprintf(&cmd, "perf-%s", argv[0]) < 0) + goto do_die; /* * argv[0] must be the perf command, but the argv array @@ -467,7 +468,7 @@ static void execv_dashed_external(const char **argv) * restore it on error. */ tmp = argv[0]; - argv[0] = cmd.buf; + argv[0] = cmd; /* * if we fail because the command is not found, it is @@ -475,15 +476,16 @@ static void execv_dashed_external(const char **argv) */ status = run_command_v_opt(argv, 0); if (status != -ERR_RUN_COMMAND_EXEC) { - if (IS_RUN_COMMAND_ERR(status)) + if (IS_RUN_COMMAND_ERR(status)) { +do_die: die("unable to run '%s'", argv[0]); + } exit(-status); } errno = ENOENT; /* as if we called execvp */ argv[0] = tmp; - - strbuf_release(&cmd); + zfree(&cmd); } static int run_argv(int *argcp, const char ***argv) From 7e9551bc72201838ebaf388224ff43ddf2b0d853 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 25 Feb 2016 06:27:36 +0100 Subject: [PATCH 02/19] perf jvmti: improve error message in Makefile This patch improves the error message given by jvmti Makefile when the alternatives command cannot be found. It now suggests the user locates the root of their Java installation and pass it with JDIR= Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456378056-18812-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Makefile | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile index 0277a64b391b..5ce61a1bda9c 100644 --- a/tools/perf/jvmti/Makefile +++ b/tools/perf/jvmti/Makefile @@ -35,12 +35,21 @@ SOLIBEXT=so # The following works at least on fedora 23, you may need the next # line for other distros. -ifeq (,$(wildcard /usr/sbin/update-java-alternatives)) -JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') -else +ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +else + ifneq (,$(wildcard /usr/sbin/alternatives)) + JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + endif endif - +ifndef JDIR +$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory) +else + ifeq (,$(wildcard $(JDIR)/include/jvmti.h)) + $(error the openjdk development package appears to me missing, install and try again) + endif +endif +$(info Using Java from $(JDIR)) # -lrt required in 32-bit mode for clock_gettime() LIBS=-lelf -lrt INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux From bb109acc4adeae425147ca87b84d312ea40f24f1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Feb 2016 10:56:21 -0300 Subject: [PATCH 03/19] perf tools: Fix parsing of pmu events with empty list of modifiers In 1d55e8ef340d ("perf tools: Introduce opt_event_config nonterminal") I removed the unconditional "'/' '/'" for pmu events such as "intel_pt//" but forgot to use opt_event_config where it expected some event_config, oops. Fix it. Noticed when trying to use: # perf record -e intel_pt// -a sleep 1 event syntax error: 'intel_pt//' \___ parser error Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events # Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 1d55e8ef340d ("perf tools: Introduce opt_event_config nonterminal") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d1fbcabbe70d..85c44ba79cad 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -217,14 +217,14 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME '/' event_config '/' +PE_NAME opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); - parse_events_terms__delete($3); + ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + parse_events_terms__delete($2); $$ = list; } | From 8579aca3f9a8f890d6d94ccaed7cf5fd54a0c3bd Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 00:12:59 +0900 Subject: [PATCH 04/19] perf script: Exception handling when the print fmt is empty After collecting samples for events 'syscalls:', perf-script with python script doesn't occasionally work generating a segmentation fault. The reason is that the print fmt is empty and a value of event->print_fmt.args is NULL, so dereferencing the null pointer results in a segmentation fault i.e.: # perf record -e syscalls:* # perf script -g python # perf script -s perf-script.py in trace_begin syscalls__sys_enter_brk 3 79841.832099154 3777 test.sh syscall_nr=12, brk=0 ... (omitted) ... Segmentation fault (core dumped) For example, a format of sys_enter_getuid() hasn't print fmt as below. # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_getuid/format name: sys_enter_getuid ID: 188 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:int syscall_nr; offset:8; size:4; signed:1; print fmt: "" So add exception handling to avoid this problem. Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/1456413179-12331-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 3 +++ tools/perf/util/scripting-engines/trace-event-python.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 544509c159ce..b3aabc0d4eb0 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -187,6 +187,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d72fafc1c800..309d90fa7698 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -205,6 +205,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; From 8560bae02a948876b26d1d86423cf5e0bb04a815 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 00:13:10 +0900 Subject: [PATCH 05/19] perf script: Remove duplicated code and needless script_spec__findnew() script_spec_register() called two functions: script_spec__find() and script_spec__findnew(). But this way script_spec__find() gets called two times, directly and via script_spec__findnew(). So remove script_spec__findnew() and make script_spec_register() only call once script_spec__find(). Signed-off-by: Taeung Song Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1456413190-12378-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ec4fbd410a4b..57f9a7e7f7d3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1212,23 +1212,6 @@ static struct script_spec *script_spec__find(const char *spec) return NULL; } -static struct script_spec *script_spec__findnew(const char *spec, - struct scripting_ops *ops) -{ - struct script_spec *s = script_spec__find(spec); - - if (s) - return s; - - s = script_spec__new(spec, ops); - if (!s) - return NULL; - - script_spec__add(s); - - return s; -} - int script_spec_register(const char *spec, struct scripting_ops *ops) { struct script_spec *s; @@ -1237,9 +1220,11 @@ int script_spec_register(const char *spec, struct scripting_ops *ops) if (s) return -1; - s = script_spec__findnew(spec, ops); + s = script_spec__new(spec, ops); if (!s) return -1; + else + script_spec__add(s); return 0; } From a7b5895b91fb97f2b0dcc2e3ce47413c18d19ca5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:16 +0900 Subject: [PATCH 06/19] perf hists: Add more helper functions for the hierarchy mode The hists__overhead_width() is to calculate width occupied by the overhead (and others) columns before the sort columns. The hist_entry__has_hiearchy_children() is to check whether an entry has lower entries (children) in the hierarchy to be shown in the output. This means the children should not be filtered out and above the percent limit. These two functions will be used to show information when all children of an entry is omitted by the percent limit (or filter). Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 22 ++++++++++++++++++++++ tools/perf/util/hist.c | 25 +++++++++++++++++++++++++ tools/perf/util/hist.h | 3 +++ 3 files changed, 50 insertions(+) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index edbf854e8e1c..7c0585c146e1 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -643,6 +643,28 @@ unsigned int hists__sort_list_width(struct hists *hists) return ret; } +unsigned int hists__overhead_width(struct hists *hists) +{ + struct perf_hpp_fmt *fmt; + int ret = 0; + bool first = true; + struct perf_hpp dummy_hpp; + + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + break; + + if (first) + first = false; + else + ret += 2; + + ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); + } + + return ret; +} + void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) { if (perf_hpp__is_sort_entry(fmt)) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1c530428e087..e71691977a95 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1582,6 +1582,31 @@ struct rb_node *rb_hierarchy_prev(struct rb_node *node) return &he->rb_node; } +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) +{ + struct rb_node *node; + struct hist_entry *child; + float percent; + + if (he->leaf) + return false; + + node = rb_first(&he->hroot_out); + child = rb_entry(node, struct hist_entry, rb_node); + + while (node && child->filtered) { + node = rb_next(node); + child = rb_entry(node, struct hist_entry, rb_node); + } + + if (node) + percent = hist_entry__get_percent_limit(child); + else + percent = 0; + + return node && percent >= limit; +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 722aa447f705..da3e7b6e4615 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -410,6 +410,7 @@ static inline int script_browse(const char *script_opt __maybe_unused) #endif unsigned int hists__sort_list_width(struct hists *hists); +unsigned int hists__overhead_width(struct hists *hists); void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode); @@ -439,4 +440,6 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) #define HIERARCHY_INDENT 3 +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit); + #endif /* __PERF_HIST_H */ From bd4abd39db92225dde8335c37d6f4efb319f9cf2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:17 +0900 Subject: [PATCH 07/19] perf report: Show message for percent limit on stdio When the hierarchy mode is used, some entries might be omiited due to a percent limit or filter. In this case the output hierarchy is different than other entries. Add an informative message to users about this. For example, when 4% of percent limit is applied: Before: # Overhead Command / Shared Object / Symbol # .............. .......................................... # 49.09% swapper 48.67% [kernel.vmlinux] 34.42% [k] intel_idle 11.51% firefox 8.87% libpthread-2.22.so 6.60% [.] __GI___libc_recvmsg 10.49% gnome-shell 4.74% libc-2.22.so 10.08% Xorg 6.11% libc-2.22.so 5.27% [.] __memcpy_sse2_unaligned 6.15% perf Note that, gnome-shell/libc has no symbols and perf has no dso/symbols. With that patch the output will look like below: After: # Overhead Command / Shared Object / Symbol # .............. .......................................... # 49.09% swapper 48.67% [kernel.vmlinux] 34.42% [k] intel_idle 11.51% firefox 8.87% libpthread-2.22.so 6.60% [.] __GI___libc_recvmsg 10.49% gnome-shell 4.74% libc-2.22.so no entry >= 4.00% 10.08% Xorg 6.11% libc-2.22.so 5.27% [.] __memcpy_sse2_unaligned 6.15% perf no entry >= 4.00% Suggested-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 435eaaaf2f1d..b3bdfcb245f9 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -628,6 +628,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, bool first = true; size_t linesz; char *line = NULL; + unsigned indent; init_rem_hits(); @@ -704,6 +705,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, goto out; } + indent = hists__overhead_width(hists) + 4; + for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; @@ -720,6 +723,20 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, if (max_rows && ++nr_rows >= max_rows) break; + /* + * If all children are filtered out or percent-limited, + * display "no entry >= x.xx%" message. + */ + if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { + int nr_sort = hists->hpp_list->nr_sort_keys; + + print_hierarchy_indent(sep, nr_sort + h->depth + 1, spaces, fp); + fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); + + if (max_rows && ++nr_rows >= max_rows) + break; + } + if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(h->thread->mg, MAP__FUNCTION, fp); From 201fde73b111e7c31fdc0e9fa6bc4b73dfef699d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:18 +0900 Subject: [PATCH 08/19] perf hists browser: Cleanup hist_browser__update_percent_limit() The previous patch introduced __rb_hierarchy_next() function with various move direction like HMD_FORCE_CHILD but missed to change using it some place. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 6bcd7670ce5f..904eaa719eb3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2477,12 +2477,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, min_callchain_hits, &callchain_param); next: - /* - * Tentatively set unfolded so that the rb_hierarchy_next() - * can toggle children of folded entries too. - */ - he->unfolded = he->has_children; - nd = rb_hierarchy_next(nd); + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); /* force to re-evaluate folding state of callchains */ he->init_have_children = false; From 79dded8776c2dc4d6e1229de69f4027e84d63673 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:19 +0900 Subject: [PATCH 09/19] perf hists browser: Show message for percent limit Like the stdio, it should show messages about omitted hierarchy entries. Please refer the previous commit for more details. As it needs to check an entry is omitted or not multiple times, add the has_no_entry field in the hist entry. Suggested-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 99 ++++++++++++++++++++++++++++++++++ tools/perf/util/hist.c | 2 + tools/perf/util/sort.h | 1 + 3 files changed, 102 insertions(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 904eaa719eb3..71c6d510390f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -260,6 +260,9 @@ static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, if (he->leaf) return callchain__count_rows(&he->sorted_chain); + if (he->has_no_entry) + return 1; + node = rb_first(&he->hroot_out); while (node) { float percent; @@ -409,10 +412,18 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) /* account grand children */ if (symbol_conf.report_hierarchy) browser->b.nr_entries += child_rows - he->nr_rows; + + if (!he->leaf && he->nr_rows == 0) { + he->has_no_entry = true; + he->nr_rows = 1; + } } else { if (symbol_conf.report_hierarchy) browser->b.nr_entries -= child_rows - he->nr_rows; + if (he->has_no_entry) + he->has_no_entry = false; + he->nr_rows = 0; } @@ -545,6 +556,12 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) browser->nr_hierarchy_entries++; if (he->leaf) browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; } } @@ -1412,6 +1429,75 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, return printed; } +static int hist_browser__show_no_entry(struct hist_browser *browser, + unsigned short row, + int level, int nr_sort_keys) +{ + int width = browser->b.width; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + bool first = true; + int column = 0; + int ret; + struct perf_hpp_fmt *fmt; + + if (current_entry) { + browser->he_selection = NULL; + browser->selection = NULL; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + hists__for_each_format(browser->hists, fmt) { + if (perf_hpp__should_skip(fmt, browser->hists) || + column++ < browser->b.horiz_scroll) + continue; + + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists)); + + if (first) { + /* for folded sign */ + first = false; + ret++; + } else { + /* space between columns */ + ret += 2; + } + + ui_browser__write_nstring(&browser->b, "", ret); + width -= ret; + } + + ui_browser__write_nstring(&browser->b, "", nr_sort_keys * HIERARCHY_INDENT); + width -= nr_sort_keys * HIERARCHY_INDENT; + + if (column >= browser->b.horiz_scroll) { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt); + ui_browser__printf(&browser->b, " %s", buf); + width -= ret + 2; + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + return 1; +} + static int advance_hpp_check(struct perf_hpp *hpp, int inc) { advance_hpp(hpp, inc); @@ -1575,6 +1661,14 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) row += hist_browser__show_hierarchy_entry(hb, h, row, h->depth, nr_sort); + if (row == browser->rows) + break; + + if (h->has_no_entry) { + hist_browser__show_no_entry(hb, row, h->depth, + nr_sort); + row++; + } } else { row += hist_browser__show_entry(hb, h, row); } @@ -2461,6 +2555,11 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { he = rb_entry(nd, struct hist_entry, rb_node); + if (he->has_no_entry) { + he->has_no_entry = false; + he->nr_rows = 0; + } + if (!he->leaf || !symbol_conf.use_callchain) goto next; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e71691977a95..75dc41d2dca9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1625,6 +1625,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h /* force fold unfiltered entry for simplicity */ parent->unfolded = false; + parent->has_no_entry = false; parent->row_offset = 0; parent->nr_rows = 0; next: @@ -1637,6 +1638,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h /* force fold unfiltered entry for simplicity */ h->unfolded = false; + h->has_no_entry = false; h->row_offset = 0; h->nr_rows = 0; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index a8d53ffe0916..25a5529a94e4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -117,6 +117,7 @@ struct hist_entry { bool init_have_children; bool unfolded; bool has_children; + bool has_no_entry; }; }; char *srcline; From 2ddda792373065577eb9207370cb7a28395caffa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:20 +0900 Subject: [PATCH 10/19] perf report: Show message for percent limit on gtk Like the stdio, it should show messages about omitted hierarchy entries. Please refer the previous commit for more details. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/hists.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 7f343339eae7..a5758fdfbe1f 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -449,6 +449,17 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, store, &iter, hpp, min_pcnt); + + if (!hist_entry__has_hierarchy_children(he, min_pcnt)) { + char buf[32]; + GtkTreeIter child; + + snprintf(buf, sizeof(buf), "no entry >= %.2f%%", + min_pcnt); + + gtk_tree_store_append(store, &child, &iter); + gtk_tree_store_set(store, &child, col_idx, buf, -1); + } } if (symbol_conf.use_callchain && he->leaf) { From 84b6ee8ea36ff797afa13c297a86ed0144482bee Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:43 +0900 Subject: [PATCH 11/19] perf hists: Fix comparing of dynamic entries When hist_entry__cmp() and hist_entry__collapse() are called, they should check if the dynamic entry is comparing matching hists only. Otherwise it might access different hists resulting in incorrect output. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 75dc41d2dca9..cc849d326211 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1002,6 +1002,10 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) int64_t cmp = 0; hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + cmp = fmt->cmp(fmt, left, right); if (cmp) break; @@ -1018,6 +1022,10 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) int64_t cmp = 0; hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + cmp = fmt->collapse(fmt, left, right); if (cmp) break; From d3a72fd8187b7fa0014394c9dec95ba349b3301e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:44 +0900 Subject: [PATCH 12/19] perf report: Fix indentation of dynamic entries in hierarchy When dynamic entries are used in the hierarchy mode with multiple events, the output might not be aligned properly. In the hierarchy mode, the each sort column is indented using total number of sort keys. So it keeps track of number of sort keys when adding them. However a dynamic sort key can be added more than once when multiple events have same field names. This results in unnecessarily long indentation in the output. For example perf kmem records following events: $ perf evlist --trace-fields -i perf.data.kmem kmem:kmalloc: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags kmem:kmalloc_node: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags,node kmem:kfree: trace_fields: call_site,ptr kmem:kmem_cache_alloc: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags kmem:kmem_cache_alloc_node: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags,node kmem:kmem_cache_free: trace_fields: call_site,ptr kmem:mm_page_alloc: trace_fields: page,order,gfp_flags,migratetype kmem:mm_page_free: trace_fields: page,order As you can see, many field names shared between kmem events. So adding 'ptr' dynamic sort key alone will set nr_sort_keys to 6. And this adds many unnecessary spaces between columns. Before: $ perf report -i perf.data.kmem --hierarchy -s ptr -g none --stdio ... # Overhead ptr # ....................... ................................... # 99.89% 0xffff8803ffb79720 0.06% 0xffff8803d228a000 0.03% 0xffff8803f7678f00 0.00% 0xffff880401dc5280 0.00% 0xffff880406172380 0.00% 0xffff8803ffac3a00 0.00% 0xffff8803ffac1600 After: # Overhead ptr # ........ .................... # 99.89% 0xffff8803ffb79720 0.06% 0xffff8803d228a000 0.03% 0xffff8803f7678f00 0.00% 0xffff880401dc5280 0.00% 0xffff880406172380 0.00% 0xffff8803ffac3a00 0.00% 0xffff8803ffac1600 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 +++--- tools/perf/ui/stdio/hist.c | 6 +++--- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 71c6d510390f..5f74c6723c53 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1547,7 +1547,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows struct perf_hpp_fmt *fmt; size_t ret = 0; int column = 0; - int nr_sort_keys = hists->hpp_list->nr_sort_keys; + int nr_sort_keys = hists->nr_sort_keys; bool first = true; ret = scnprintf(buf, size, " "); @@ -1632,7 +1632,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) u16 header_offset = 0; struct rb_node *nd; struct hist_browser *hb = container_of(browser, struct hist_browser, b); - int nr_sort = hb->hists->hpp_list->nr_sort_keys; + int nr_sort = hb->hists->nr_sort_keys; if (hb->show_headers) { hist_browser__show_headers(hb); @@ -1969,7 +1969,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), browser->min_pcnt); int printed = 0; - int nr_sort = browser->hists->hpp_list->nr_sort_keys; + int nr_sort = browser->hists->nr_sort_keys; while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index b3bdfcb245f9..5733d6c196be 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -495,7 +495,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, size = hpp.size = bfsz; if (symbol_conf.report_hierarchy) { - int nr_sort = hists->hpp_list->nr_sort_keys; + int nr_sort = hists->nr_sort_keys; return hist_entry__hierarchy_fprintf(he, &hpp, nr_sort, hists, fp); @@ -529,7 +529,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, unsigned header_width = 0; struct perf_hpp_fmt *fmt; - nr_sort = hists->hpp_list->nr_sort_keys; + nr_sort = hists->nr_sort_keys; /* preserve max indent depth for column headers */ print_hierarchy_indent(sep, nr_sort, spaces, fp); @@ -728,7 +728,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, * display "no entry >= x.xx%" message. */ if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { - int nr_sort = hists->hpp_list->nr_sort_keys; + int nr_sort = hists->nr_sort_keys; print_hierarchy_indent(sep, nr_sort + h->depth + 1, spaces, fp); fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index da3e7b6e4615..da5e50586bfd 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -78,6 +78,7 @@ struct hists { u16 col_len[HISTC_NR_COLS]; int socket_filter; struct perf_hpp_list *hpp_list; + int nr_sort_keys; }; struct hist_entry_iter; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6bee8bdfb91b..2beb7a6360a4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2633,6 +2633,9 @@ static int __setup_output_field(void) int setup_sorting(struct perf_evlist *evlist) { int err; + struct hists *hists; + struct perf_evsel *evsel; + struct perf_hpp_fmt *fmt; err = __setup_sorting(evlist); if (err < 0) @@ -2644,6 +2647,22 @@ int setup_sorting(struct perf_evlist *evlist) return err; } + evlist__for_each(evlist, evsel) { + hists = evsel__hists(evsel); + hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; + + /* + * If dynamic entries were used, it might add multiple + * entries to each evsel for a single field name. Set + * actual number of sort keys for each hists. + */ + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + hists->nr_sort_keys--; + } + } + reset_dimensions(); /* From cb1fab917206f822d1f905cbc45971eefdef361d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:45 +0900 Subject: [PATCH 13/19] perf report: Left align dynamic entries in hierarchy The dynamic entries are right-aligned unlike other entries since it usually has numeric value. But for the hierarchy mode, left alignment is more appropriate IMHO. Also trim spaces on the left so that we can easily identify the hierarchy. Before: $ perf report --hierarchy -i perf.data.kmem -s gfp_flags,ptr,bytes_req --stdio -g none ... # # Overhead gfp_flags / ptr / bytes_req # .............. ................................................................................................. # 91.67% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 37.50% 0xffff8803f7669400 37.50% 448 8.33% 0xffff8803f766be00 8.33% 96 4.17% 0xffff8800d156dc00 4.17% 704 After: # Overhead gfp_flags / ptr / bytes_req # .............. .................................... # 91.67% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 37.50% 0xffff8803f7669400 37.50% 448 8.33% 0xffff8803f766be00 8.33% 96 4.17% 0xffff8800d156dc00 4.17% 704 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 16 ++++++++++++++-- tools/perf/ui/stdio/hist.c | 28 +++++++++++++++++++--------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5f74c6723c53..5ffffcb1e3c5 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1400,8 +1400,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, if (fmt->color) { width -= fmt->color(fmt, &hpp, entry); } else { + int i = 0; + width -= fmt->entry(fmt, &hpp, entry); - ui_browser__printf(&browser->b, "%s", s); + ui_browser__printf(&browser->b, "%s", ltrim(s)); + + while (isspace(s[i++])) + width++; } } @@ -1576,6 +1581,8 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows return ret; hists__for_each_format(hists, fmt) { + char *start; + if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) continue; if (perf_hpp__should_skip(fmt, hists)) @@ -1593,7 +1600,12 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows dummy_hpp.buf[ret] = '\0'; rtrim(dummy_hpp.buf); - ret = strlen(dummy_hpp.buf); + start = ltrim(dummy_hpp.buf); + ret = strlen(start); + + if (start != dummy_hpp.buf) + memmove(dummy_hpp.buf, start, ret + 1); + if (advance_hpp_check(&dummy_hpp, ret)) break; } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5733d6c196be..6d06fbb365b6 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -418,6 +418,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; char *buf = hpp->buf; + size_t size = hpp->size; int ret, printed = 0; bool first = true; @@ -457,6 +458,11 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, (nr_sort_key - 1) * HIERARCHY_INDENT + 2, ""); advance_hpp(hpp, ret); + printed += fprintf(fp, "%s", buf); + + hpp->buf = buf; + hpp->size = size; + /* * No need to call hist_entry__snprintf_alignment() since this * fmt is always the last column in the hierarchy mode. @@ -467,7 +473,11 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, else fmt->entry(fmt, hpp, he); - printed += fprintf(fp, "%s\n", buf); + /* + * dynamic entries are right-aligned but we want left-aligned + * in the hierarchy mode + */ + printed += fprintf(fp, "%s\n", ltrim(buf)); if (symbol_conf.use_callchain && he->leaf) { u64 total = hists__total_period(hists); @@ -525,6 +535,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, { bool first = true; int nr_sort; + int depth; unsigned width = 0; unsigned header_width = 0; struct perf_hpp_fmt *fmt; @@ -558,19 +569,16 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, if (!first) header_width += fprintf(fp, " / "); else { - header_width += fprintf(fp, "%s", sep ?: " "); + fprintf(fp, "%s", sep ?: " "); first = false; } fmt->header(fmt, hpp, hists_to_evsel(hists)); rtrim(hpp->buf); - header_width += fprintf(fp, "%s", hpp->buf); + header_width += fprintf(fp, "%s", ltrim(hpp->buf)); } - /* preserve max indent depth for combined sort headers */ - print_hierarchy_indent(sep, nr_sort, spaces, fp); - fprintf(fp, "\n# "); /* preserve max indent depth for initial dots */ @@ -590,6 +598,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, fprintf(fp, "%.*s", width, dots); } + depth = 0; hists__for_each_format(hists, fmt) { if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) continue; @@ -597,15 +606,16 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, continue; width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + width += depth * HIERARCHY_INDENT; + if (width > header_width) header_width = width; + + depth++; } fprintf(fp, "%s%-.*s", sep ?: " ", header_width, dots); - /* preserve max indent depth for dots under sort headers */ - print_hierarchy_indent(sep, nr_sort, dots, fp); - fprintf(fp, "\n#\n"); return 2; From e049d4a3fa194c8aa0d3ca29a9b11b32387ca6e3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:46 +0900 Subject: [PATCH 14/19] perf hists: Fix dynamic entry display in hierarchy When dynamic sort key is used it might not show pretty printed output. This is because the trace output was not set only for the first dynamic sort key. During hierarchy_insert_entry() it missed to pass the trace_output to dynamic entries. Also even if it did, only first entry will have it. Subsequent entries might set it during collapsing stage but it's not guaranteed. Before: $ perf report --hierarchy --stdio -s ptr,bytes_req,gfp_flags -g none # # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% 66080 10.42% 0xffff8803f766be00 8.33% 96 8.33% 66080 2.08% 512 2.08% 67280 After: # # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 2 +- tools/perf/util/sort.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cc849d326211..9b3f582867d6 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1125,7 +1125,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->fmt = fmt; /* some fields are now passed to 'new' */ - if (perf_hpp__is_trace_entry(fmt)) + if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) he->trace_output = NULL; else new->trace_output = NULL; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2beb7a6360a4..d26c6b9fe348 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1764,6 +1764,9 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, if (hde->raw_trace) goto raw_field; + if (!he->trace_output) + he->trace_output = get_trace_output(he); + field = hde->field; namelen = strlen(field->name); str = he->trace_output; From abab5e7fcec16e526968f8a5448cd81c635705ce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:47 +0900 Subject: [PATCH 15/19] perf report: Update column width of dynamic entries The column width of dynamic entries is updated when comparing hist entries. However some unique entries can miss the chance to update. So move the update to output resort stage to make sure every entry will get called before display. To do that, abuse ->sort callback to update the width when the third argument is NULL. When resorting entries in normal path, it never be NULL so it should be fine IMHO. Before: # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP <-- here After: # Overhead ptr / bytes_req / gfp_flags # .............. ..................................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP_NOMEMALLOC Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 11 +++++++++++ tools/perf/util/sort.c | 8 +++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9b3f582867d6..4b8b67bc0cd8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1371,6 +1371,10 @@ static void hierarchy_insert_output_entry(struct rb_root *root, rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, root); + + /* update column width of dynamic entry */ + if (perf_hpp__is_dynamic_entry(he->fmt)) + he->fmt->sort(he->fmt, he, NULL); } static void hists__hierarchy_output_resort(struct hists *hists, @@ -1440,6 +1444,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; + struct perf_hpp_fmt *fmt; if (use_callchain) { if (callchain_param.mode == CHAIN_GRAPH_REL) { @@ -1466,6 +1471,12 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, entries); + + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + perf_hpp__defined_dynamic_entry(fmt, he->hists)) + fmt->sort(fmt, he, NULL); /* update column width */ + } } static void output_resort(struct hists *hists, struct ui_progress *prog, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d26c6b9fe348..5888bfe9a193 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1816,6 +1816,11 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + if (b == NULL) { + update_dynamic_len(hde, a); + return 0; + } + field = hde->field; if (field->flags & FIELD_IS_DYNAMIC) { unsigned long long dyn; @@ -1830,9 +1835,6 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, } else { offset = field->offset; size = field->size; - - update_dynamic_len(hde, a); - update_dynamic_len(hde, b); } return memcmp(a->raw_data + offset, b->raw_data + offset, size); From b8cbb349061edda648463b086cfa869a7ab583af Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:51 +0000 Subject: [PATCH 16/19] perf config: Bring perf_default_config to the very beginning at main() Before this patch each subcommand calls perf_config() by themself, reading the default configuration together with subcommand specific options. If a subcommand doesn't have it own options, it needs to call 'perf_config(perf_default_config, NULL)' to ensure .perfconfig is loaded. This patch brings perf_config(perf_default_config, NULL) to the very start of main(), so subcommands don't need to do it. After this patch, 'llvm.clang-path' works for 'perf trace'. Signed-off-by: Wang Nan Suggested-and-Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-4-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 2 -- tools/perf/builtin-help.c | 2 +- tools/perf/builtin-kmem.c | 4 ++-- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/perf.c | 2 ++ tools/perf/tests/llvm.c | 8 -------- tools/perf/util/color.c | 5 +++-- tools/perf/util/data-convert-bt.c | 2 +- tools/perf/util/help-unknown-cmd.c | 5 +++-- 10 files changed, 15 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 36ccc2b8827f..4d72359fd15a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1264,8 +1264,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (ret < 0) return ret; - perf_config(perf_default_config, NULL); - argc = parse_options(argc, argv, options, diff_usage, 0); if (symbol__init(NULL) < 0) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index f4dd2b48f90f..49d55e21b1b0 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -272,7 +272,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "man.")) return add_man_viewer_info(var, value); - return perf_default_config(var, value, cb); + return 0; } static struct cmdnames main_cmds, other_cmds; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 118010553d0c..4d3340cce9a0 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1834,7 +1834,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static int kmem_config(const char *var, const char *value, void *cb) +static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "kmem.default")) { if (!strcmp(value, "slab")) @@ -1847,7 +1847,7 @@ static int kmem_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f4d8244449ca..7eea49f9ed46 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -90,7 +90,7 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int hist_iter__report_callback(struct hist_entry_iter *iter, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b86b623e8799..94af190f6843 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1065,7 +1065,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) return parse_callchain_top_opt(arg); } -static int perf_top_config(const char *var, const char *value, void *cb) +static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "top.call-graph")) var = "call-graph.record-mode"; /* fall-through */ @@ -1074,7 +1074,7 @@ static int perf_top_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int diff --git a/tools/perf/perf.c b/tools/perf/perf.c index f6321194937f..aaee0a782747 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -548,6 +548,8 @@ int main(int argc, const char **argv) srandom(time(NULL)); + perf_config(perf_default_config, NULL); + /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 70edcdfa5672..cff564fb4b66 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -6,12 +6,6 @@ #include "tests.h" #include "debug.h" -static int perf_config_cb(const char *var, const char *val, - void *arg __maybe_unused) -{ - return perf_default_config(var, val, arg); -} - #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { @@ -77,8 +71,6 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, if (should_load_fail) *should_load_fail = bpf_source_table[idx].should_load_fail; - perf_config(perf_config_cb, NULL); - /* * Skip this test if user's .perfconfig doesn't set [llvm] section * and clang is not found in $PATH, and this is not perf test -v diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e5fb88bab9e1..43e84aa27e4a 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -32,14 +32,15 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) return 0; } -int perf_color_default_config(const char *var, const char *value, void *cb) +int perf_color_default_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "color.ui")) { perf_use_color_default = perf_config_colorbool(var, value, -1); return 0; } - return perf_default_config(var, value, cb); + return 0; } static int __color_vsnprintf(char *bf, size_t size, const char *color, diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index b722e57d5a87..6729f4d9df7c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1117,7 +1117,7 @@ static int convert__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int bt_convert__perf2ctf(const char *input, const char *path, bool force) diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index dc1e41c9b054..43a98a4dc1e1 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -6,7 +6,8 @@ static int autocorrect; static struct cmdnames aliases; -static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) +static int perf_unknown_cmd_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) autocorrect = perf_config_int(var,value); @@ -14,7 +15,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "alias.")) add_cmdname(&aliases, var + 6, strlen(var + 6)); - return perf_default_config(var, value, cb); + return 0; } static int levenshtein_compare(const void *p1, const void *p2) From fdf14720fbd02d406ac2c1c50444774b4c7eed9a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:53 +0000 Subject: [PATCH 17/19] perf tools: Only set filter for tracepoints events perf_evlist__set_filter() tries to set filter to every evsel linked in the evlist. However, since filters can only be applied to tracepoints, checking type of evsel before calling perf_evsel__set_filter() would be better. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c42e1967e970..86a03836a83f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1223,6 +1223,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) int err = 0; evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + continue; + err = perf_evsel__set_filter(evsel, filter); if (err) break; From ba50423530200659d4deb703a8f72d3b69bc13ce Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:54 +0000 Subject: [PATCH 18/19] perf trace: Call bpf__apply_obj_config in 'perf trace' Without this patch BPF map configuration is not applied. Command like this: # ./perf trace --ev bpf-output/no-inherit,name=evt/ \ --ev ./test_bpf_trace.c/map:channel.event=evt/ \ usleep 100000 Load BPF files without error, but since map:channel.event=evt is not applied, bpf-output event not work. This patch allows 'perf trace' load and run BPF scripts. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 20916dd77aac..254149ca5e1b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -33,6 +33,7 @@ #include "util/stat.h" #include "trace-event.h" #include "util/parse-events.h" +#include "util/bpf-loader.h" #include #include @@ -2586,6 +2587,16 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_error_open; + } + /* * Better not use !target__has_task() here because we need to cover the * case where no threads were specified in the command line, but a From 1d6c9407d45dd622b277ca9f725da3cc9e95b5de Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:55 +0000 Subject: [PATCH 19/19] perf trace: Print content of bpf-output event With this patch the contend of BPF output event is printed by 'perf trace'. For example: # ./perf trace -a --ev bpf-output/no-inherit,name=evt/ \ --ev ./test_bpf_trace.c/map:channel.event=evt/ \ usleep 100000 ... 1.787 ( 0.004 ms): usleep/3832 nanosleep(rqtp: 0x7ffc78b18980 ) ... 1.787 ( ): evt:Raise a BPF event!..) 1.788 ( ): perf_bpf_probe:func_begin:(ffffffff810e97d0)) ... 101.866 (87.038 ms): gmain/1654 poll(ufds: 0x7f57a80008c0, nfds: 2, timeout_msecs: 1000 ) ... 101.866 ( ): evt:Raise a BPF event!..) 101.867 ( ): perf_bpf_probe:func_end:(ffffffff810e97d0 <- ffffffff81796173)) 101.869 (100.087 ms): usleep/3832 ... [continued]: nanosleep()) = 0 ... (There is an extra ')' at the end of several lines. However, it is another problem, unrelated to this commit.) Where test_bpf_trace.c is: /************************ BEGIN **************************/ #include struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; static inline int __attribute__((always_inline)) func(void *ctx, int type) { char output_str[] = "Raise a BPF event!"; char err_str[] = "BAD %d\n"; int err; err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_str, sizeof(output_str)); if (err) trace_printk(err_str, sizeof(err_str), err); return 1; } SEC("func_begin=sys_nanosleep") int func_begin(void *ctx) {return func(ctx, 1);} SEC("func_end=sys_nanosleep%return") int func_end(void *ctx) { return func(ctx, 2);} char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-8-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 254149ca5e1b..26a337f939d8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2178,6 +2178,37 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs return 0; } +static void bpf_output__printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + FILE *output = extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_CHAR_DATA: + fprintf(output, "%c", isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_DATA_BEGIN: + case BINARY_PRINT_LINE_BEGIN: + case BINARY_PRINT_ADDR: + case BINARY_PRINT_NUM_DATA: + case BINARY_PRINT_NUM_PAD: + case BINARY_PRINT_SEP: + case BINARY_PRINT_CHAR_PAD: + case BINARY_PRINT_LINE_END: + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void bpf_output__fprintf(struct trace *trace, + struct perf_sample *sample) +{ + print_binary(sample->raw_data, sample->raw_size, 8, + bpf_output__printer, trace->output); +} + static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2190,7 +2221,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, fprintf(trace->output, "%s:", evsel->name); - if (evsel->tp_format) { + if (perf_evsel__is_bpf_output(evsel)) { + bpf_output__fprintf(trace, sample); + } else if (evsel->tp_format) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output);