From 75b49202d87c142a646e37edb466462ea6fbe0fb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Nov 2016 11:16:06 -0300 Subject: [PATCH 01/12] perf annotate: Remove duplicate 'name' field from disasm_line The disasm_line::name field is always equal to ins::name, being used just to locate the instruction's ins_ops from the per-arch instructions table. Eliminate this duplication, nuking that field and instead make ins__find() return an ins_ops, store it in disasm_line::ins.ops, and keep just in disasm_line::ins.name what was in disasm_line::name, this way we end up not keeping a reference to entries in the per-arch instructions table. This in turn will help supporting multiple ways to manage the per-arch instructions table, allowing resorting that array, for instance, when the entries will move after references to its addresses were made. The same problem is avoided when one grows the array with realloc. So architectures simply keeping a constant array will work as well as architectures building the table using regular expressions or other logic that involves resorting the table. Reviewed-by: Ravi Bangoria Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Chris Riyder Cc: David Ahern Cc: Jiri Olsa Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Pawel Moll Cc: Peter Zijlstra Cc: Russell King Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-vr899azvabnw9gtuepuqfd9t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 18 ++++---- tools/perf/util/annotate.c | 73 ++++++++++++++----------------- tools/perf/util/annotate.h | 17 ++++--- 3 files changed, 51 insertions(+), 57 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index e6e9f7d80dbd..cee0eee31ce6 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -213,17 +213,17 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_nstring(browser, bf, printed); if (change_color) ui_browser__set_color(browser, color); - if (dl->ins && dl->ins->ops->scnprintf) { - if (ins__is_jump(dl->ins)) { + if (dl->ins.ops && dl->ins.ops->scnprintf) { + if (ins__is_jump(&dl->ins)) { bool fwd = dl->ops.target.offset > (u64)dl->offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); SLsmg_write_char(' '); - } else if (ins__is_call(dl->ins)) { + } else if (ins__is_call(&dl->ins)) { ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); SLsmg_write_char(' '); - } else if (ins__is_ret(dl->ins)) { + } else if (ins__is_ret(&dl->ins)) { ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); SLsmg_write_char(' '); } else { @@ -243,7 +243,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) { - if (!dl || !dl->ins || !ins__is_jump(dl->ins) + if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) || !disasm_line__has_offset(dl) || dl->ops.target.offset >= symbol__size(sym)) return false; @@ -492,7 +492,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, }; char title[SYM_TITLE_MAX_SIZE]; - if (!ins__is_call(dl->ins)) + if (!ins__is_call(&dl->ins)) return false; if (map_groups__find_ams(&target) || @@ -545,7 +545,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) struct disasm_line *dl = browser->selection; s64 idx; - if (!ins__is_jump(dl->ins)) + if (!ins__is_jump(&dl->ins)) return false; dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx); @@ -841,9 +841,9 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); else if (browser->selection->offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); - else if (!browser->selection->ins) + else if (!browser->selection->ins.ops) goto show_sup_ins; - else if (ins__is_ret(browser->selection->ins)) + else if (ins__is_ret(&browser->selection->ins)) goto out; else if (!(annotate_browser__jump(browser) || annotate_browser__callq(browser, evsel, hbt))) { diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 095d90a9077f..b48a39be071b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -28,8 +28,8 @@ const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; -static struct ins *ins__find(struct arch *arch, const char *name); -static int disasm_line__parse(char *line, char **namep, char **rawp); +static struct ins_ops *ins__find(struct arch *arch, const char *name); +static int disasm_line__parse(char *line, const char **namep, char **rawp); struct arch { const char *name; @@ -218,26 +218,20 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { - char *name; - ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); if (ops->locked.ops == NULL) return 0; - if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) + if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) goto out_free_ops; - ops->locked.ins = ins__find(arch, name); - free(name); + ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name); - if (ops->locked.ins == NULL) + if (ops->locked.ins.ops == NULL) goto out_free_ops; - if (!ops->locked.ins->ops) - return 0; - - if (ops->locked.ins->ops->parse && - ops->locked.ins->ops->parse(arch, ops->locked.ops, map) < 0) + if (ops->locked.ins.ops->parse && + ops->locked.ins.ops->parse(arch, ops->locked.ops, map) < 0) goto out_free_ops; return 0; @@ -252,19 +246,19 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, { int printed; - if (ops->locked.ins == NULL) + if (ops->locked.ins.ops == NULL) return ins__raw_scnprintf(ins, bf, size, ops); printed = scnprintf(bf, size, "%-6.6s ", ins->name); - return printed + ins__scnprintf(ops->locked.ins, bf + printed, + return printed + ins__scnprintf(&ops->locked.ins, bf + printed, size - printed, ops->locked.ops); } static void lock__delete(struct ins_operands *ops) { - struct ins *ins = ops->locked.ins; + struct ins *ins = &ops->locked.ins; - if (ins && ins->ops->free) + if (ins->ops && ins->ops->free) ins->ops->free(ops->locked.ops); else ins__delete(ops->locked.ops); @@ -425,8 +419,9 @@ static void ins__sort(struct arch *arch) qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); } -static struct ins *ins__find(struct arch *arch, const char *name) +static struct ins_ops *ins__find(struct arch *arch, const char *name) { + struct ins *ins; const int nmemb = arch->nr_instructions; if (!arch->sorted_instructions) { @@ -434,7 +429,8 @@ static struct ins *ins__find(struct arch *arch, const char *name) arch->sorted_instructions = true; } - return bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + return ins ? ins->ops : NULL; } static int arch__key_cmp(const void *name, const void *archp) @@ -691,19 +687,16 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map) { - dl->ins = ins__find(arch, dl->name); + dl->ins.ops = ins__find(arch, dl->ins.name); - if (dl->ins == NULL) + if (!dl->ins.ops) return; - if (!dl->ins->ops) - return; - - if (dl->ins->ops->parse && dl->ins->ops->parse(arch, &dl->ops, map) < 0) - dl->ins = NULL; + if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, map) < 0) + dl->ins.ops = NULL; } -static int disasm_line__parse(char *line, char **namep, char **rawp) +static int disasm_line__parse(char *line, const char **namep, char **rawp) { char *name = line, tmp; @@ -736,7 +729,8 @@ static int disasm_line__parse(char *line, char **namep, char **rawp) return 0; out_free_name: - zfree(namep); + free((void *)namep); + *namep = NULL; return -1; } @@ -755,7 +749,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, goto out_delete; if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) + if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; disasm_line__init_ins(dl, arch, map); @@ -774,20 +768,21 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, void disasm_line__free(struct disasm_line *dl) { zfree(&dl->line); - zfree(&dl->name); - if (dl->ins && dl->ins->ops->free) - dl->ins->ops->free(&dl->ops); + if (dl->ins.ops && dl->ins.ops->free) + dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); + free((void *)dl->ins.name); + dl->ins.name = NULL; free(dl); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { - if (raw || !dl->ins) - return scnprintf(bf, size, "%-6.6s %s", dl->name, dl->ops.raw); + if (raw || !dl->ins.ops) + return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); - return ins__scnprintf(dl->ins, bf, size, &dl->ops); + return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } static void disasm__add(struct list_head *head, struct disasm_line *line) @@ -1143,7 +1138,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, map__rip_2objdump(map, sym->start); /* kcore has no symbols, so add the call target name */ - if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) { + if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { struct addr_map_symbol target = { .map = map, .addr = dl->ops.target.addr, @@ -1173,8 +1168,8 @@ static void delete_last_nop(struct symbol *sym) while (!list_empty(list)) { dl = list_entry(list->prev, struct disasm_line, node); - if (dl->ins && dl->ins->ops) { - if (dl->ins->ops != &nop_ops) + if (dl->ins.ops) { + if (dl->ins.ops != &nop_ops) return; } else { if (!strstr(dl->line, " nop ") && @@ -1767,7 +1762,7 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) if (dl->offset == -1) return fprintf(fp, "%s\n", dl->line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 8e490b5c91bc..87e4cadc5d27 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -11,7 +11,12 @@ #include #include -struct ins; +struct ins_ops; + +struct ins { + const char *name; + struct ins_ops *ops; +}; struct ins_operands { char *raw; @@ -28,7 +33,7 @@ struct ins_operands { u64 addr; } source; struct { - struct ins *ins; + struct ins ins; struct ins_operands *ops; } locked; }; @@ -43,11 +48,6 @@ struct ins_ops { struct ins_operands *ops); }; -struct ins { - const char *name; - struct ins_ops *ops; -}; - bool ins__is_jump(const struct ins *ins); bool ins__is_call(const struct ins *ins); bool ins__is_ret(const struct ins *ins); @@ -59,8 +59,7 @@ struct disasm_line { struct list_head node; s64 offset; char *line; - char *name; - struct ins *ins; + struct ins ins; int line_nr; float ipc; u64 cycles; From 2a1ff812c40be982e4dd7a44159462fb25bebdf3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Nov 2016 11:37:08 -0300 Subject: [PATCH 02/12] perf annotate: Introduce alternative method of keeping instructions table Some arches may want to dynamically populate the table using regular expressions on the instruction names to associate them with a set of parsing/formatting/etc functions (struct ins_ops), so provide a fallback for when the ins__find() method fails. That fall back will be able to resize the arch->instructions, setting arch->nr_instructions appropriately, helper functions to associate an ins_ops to an instruction name, growing the arch->instructions if needed and resorting it are provided, all the arch specific callback needs to do is to decide if the missing instruction should be added to arch->instructions with a ins_ops association. Reviewed-by: Ravi Bangoria Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Chris Riyder Cc: David Ahern Cc: Jiri Olsa Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Pawel Moll Cc: Peter Zijlstra Cc: Russell King Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-auu13yradxf7g5dgtpnzt97a@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 63 +++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b48a39be071b..026915a7dac8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -29,12 +29,15 @@ const char *objdump_path; static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); +static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); struct arch { const char *name; struct ins *instructions; size_t nr_instructions; + size_t nr_instructions_allocated; + struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); bool sorted_instructions; struct { char comment_char; @@ -50,6 +53,54 @@ static struct ins_ops nop_ops; static struct ins_ops lock_ops; static struct ins_ops ret_ops; +static int arch__grow_instructions(struct arch *arch) +{ + struct ins *new_instructions; + size_t new_nr_allocated; + + if (arch->nr_instructions_allocated == 0 && arch->instructions) + goto grow_from_non_allocated_table; + + new_nr_allocated = arch->nr_instructions_allocated + 128; + new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); + if (new_instructions == NULL) + return -1; + +out_update_instructions: + arch->instructions = new_instructions; + arch->nr_instructions_allocated = new_nr_allocated; + return 0; + +grow_from_non_allocated_table: + new_nr_allocated = arch->nr_instructions + 128; + new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); + if (new_instructions == NULL) + return -1; + + memcpy(new_instructions, arch->instructions, arch->nr_instructions); + goto out_update_instructions; +} + +static __maybe_unused int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) +{ + struct ins *ins; + + if (arch->nr_instructions == arch->nr_instructions_allocated && + arch__grow_instructions(arch)) + return -1; + + ins = &arch->instructions[arch->nr_instructions]; + ins->name = strdup(name); + if (!ins->name) + return -1; + + ins->ops = ops; + arch->nr_instructions++; + + ins__sort(arch); + return 0; +} + #include "arch/arm/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" @@ -419,7 +470,7 @@ static void ins__sort(struct arch *arch) qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); } -static struct ins_ops *ins__find(struct arch *arch, const char *name) +static struct ins_ops *__ins__find(struct arch *arch, const char *name) { struct ins *ins; const int nmemb = arch->nr_instructions; @@ -433,6 +484,16 @@ static struct ins_ops *ins__find(struct arch *arch, const char *name) return ins ? ins->ops : NULL; } +static struct ins_ops *ins__find(struct arch *arch, const char *name) +{ + struct ins_ops *ops = __ins__find(arch, name); + + if (!ops && arch->associate_instruction_ops) + ops = arch->associate_instruction_ops(arch, name); + + return ops; +} + static int arch__key_cmp(const void *name, const void *archp) { const struct arch *arch = archp; From 0781ea923445405a45464842e9ee0e30f76cb84b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Nov 2016 12:34:26 -0300 Subject: [PATCH 03/12] perf annotate: Allow arches to have a init routine and a priv area Arches like ARM will want to use regular expressions when deciding what instructions to associate with what ins_ops, provide infrastructure for that. Reviewed-by: Ravi Bangoria Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Chris Riyder Cc: David Ahern Cc: Jiri Olsa Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Pawel Moll Cc: Peter Zijlstra Cc: Russell King Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7dmnk9el2ipu3nxog092k9z5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 026915a7dac8..1e96549650d7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -39,6 +39,9 @@ struct arch { size_t nr_instructions_allocated; struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); bool sorted_instructions; + bool initialized; + void *priv; + int (*init)(struct arch *arch); struct { char comment_char; char skip_functions_char; @@ -1356,6 +1359,14 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na if (arch == NULL) return -ENOTSUP; + if (arch->init) { + err = arch->init(arch); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); From acc9bfb5fae5c48ca875911d87d8d8a9d886bb66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Nov 2016 16:54:10 -0300 Subject: [PATCH 04/12] perf annotate: Improve support for ARM By using arch->init() to set up some regular expressions to associate ins_ops to ARM instructions, ditching that old table that has instructions not present on ARM. Take advantage of having an arch->init() to hide more arm specific stuff from the common code, like the objdump details. The regular expressions comes from a patch written by Kim Phillips. Reviewed-by: Ravi Bangoria Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Chris Riyder Cc: David Ahern Cc: Jiri Olsa Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Pawel Moll Cc: Peter Zijlstra Cc: Russell King Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-77m7lufz9ajjimkrebtg5ead@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/annotate/instructions.c | 147 ++++++++------------ tools/perf/util/annotate.c | 9 +- 2 files changed, 60 insertions(+), 96 deletions(-) diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index d67b8aa26274..1ce0872b1726 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -1,90 +1,59 @@ -static struct ins arm__instructions[] = { - { .name = "add", .ops = &mov_ops, }, - { .name = "addl", .ops = &mov_ops, }, - { .name = "addq", .ops = &mov_ops, }, - { .name = "addw", .ops = &mov_ops, }, - { .name = "and", .ops = &mov_ops, }, - { .name = "b", .ops = &jump_ops, }, // might also be a call - { .name = "bcc", .ops = &jump_ops, }, - { .name = "bcs", .ops = &jump_ops, }, - { .name = "beq", .ops = &jump_ops, }, - { .name = "bge", .ops = &jump_ops, }, - { .name = "bgt", .ops = &jump_ops, }, - { .name = "bhi", .ops = &jump_ops, }, - { .name = "bl", .ops = &call_ops, }, - { .name = "bls", .ops = &jump_ops, }, - { .name = "blt", .ops = &jump_ops, }, - { .name = "blx", .ops = &call_ops, }, - { .name = "bne", .ops = &jump_ops, }, - { .name = "bts", .ops = &mov_ops, }, - { .name = "call", .ops = &call_ops, }, - { .name = "callq", .ops = &call_ops, }, - { .name = "cmp", .ops = &mov_ops, }, - { .name = "cmpb", .ops = &mov_ops, }, - { .name = "cmpl", .ops = &mov_ops, }, - { .name = "cmpq", .ops = &mov_ops, }, - { .name = "cmpw", .ops = &mov_ops, }, - { .name = "cmpxch", .ops = &mov_ops, }, - { .name = "dec", .ops = &dec_ops, }, - { .name = "decl", .ops = &dec_ops, }, - { .name = "imul", .ops = &mov_ops, }, - { .name = "inc", .ops = &dec_ops, }, - { .name = "incl", .ops = &dec_ops, }, - { .name = "ja", .ops = &jump_ops, }, - { .name = "jae", .ops = &jump_ops, }, - { .name = "jb", .ops = &jump_ops, }, - { .name = "jbe", .ops = &jump_ops, }, - { .name = "jc", .ops = &jump_ops, }, - { .name = "jcxz", .ops = &jump_ops, }, - { .name = "je", .ops = &jump_ops, }, - { .name = "jecxz", .ops = &jump_ops, }, - { .name = "jg", .ops = &jump_ops, }, - { .name = "jge", .ops = &jump_ops, }, - { .name = "jl", .ops = &jump_ops, }, - { .name = "jle", .ops = &jump_ops, }, - { .name = "jmp", .ops = &jump_ops, }, - { .name = "jmpq", .ops = &jump_ops, }, - { .name = "jna", .ops = &jump_ops, }, - { .name = "jnae", .ops = &jump_ops, }, - { .name = "jnb", .ops = &jump_ops, }, - { .name = "jnbe", .ops = &jump_ops, }, - { .name = "jnc", .ops = &jump_ops, }, - { .name = "jne", .ops = &jump_ops, }, - { .name = "jng", .ops = &jump_ops, }, - { .name = "jnge", .ops = &jump_ops, }, - { .name = "jnl", .ops = &jump_ops, }, - { .name = "jnle", .ops = &jump_ops, }, - { .name = "jno", .ops = &jump_ops, }, - { .name = "jnp", .ops = &jump_ops, }, - { .name = "jns", .ops = &jump_ops, }, - { .name = "jnz", .ops = &jump_ops, }, - { .name = "jo", .ops = &jump_ops, }, - { .name = "jp", .ops = &jump_ops, }, - { .name = "jpe", .ops = &jump_ops, }, - { .name = "jpo", .ops = &jump_ops, }, - { .name = "jrcxz", .ops = &jump_ops, }, - { .name = "js", .ops = &jump_ops, }, - { .name = "jz", .ops = &jump_ops, }, - { .name = "lea", .ops = &mov_ops, }, - { .name = "lock", .ops = &lock_ops, }, - { .name = "mov", .ops = &mov_ops, }, - { .name = "movb", .ops = &mov_ops, }, - { .name = "movdqa", .ops = &mov_ops, }, - { .name = "movl", .ops = &mov_ops, }, - { .name = "movq", .ops = &mov_ops, }, - { .name = "movslq", .ops = &mov_ops, }, - { .name = "movzbl", .ops = &mov_ops, }, - { .name = "movzwl", .ops = &mov_ops, }, - { .name = "nop", .ops = &nop_ops, }, - { .name = "nopl", .ops = &nop_ops, }, - { .name = "nopw", .ops = &nop_ops, }, - { .name = "or", .ops = &mov_ops, }, - { .name = "orl", .ops = &mov_ops, }, - { .name = "test", .ops = &mov_ops, }, - { .name = "testb", .ops = &mov_ops, }, - { .name = "testl", .ops = &mov_ops, }, - { .name = "xadd", .ops = &mov_ops, }, - { .name = "xbeginl", .ops = &jump_ops, }, - { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, +#include +#include + +struct arm_annotate { + regex_t call_insn, + jump_insn; }; + +static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const char *name) +{ + struct arm_annotate *arm = arch->priv; + struct ins_ops *ops; + regmatch_t match[2]; + + if (!regexec(&arm->call_insn, name, 2, match, 0)) + ops = &call_ops; + else if (!regexec(&arm->jump_insn, name, 2, match, 0)) + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int arm__annotate_init(struct arch *arch) +{ + struct arm_annotate *arm; + int err; + + if (arch->initialized) + return 0; + + arm = zalloc(sizeof(*arm)); + if (!arm) + return -1; + +#define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)" + err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED); + if (err) + goto out_free_arm; + err = regcomp(&arm->jump_insn, "^bx?" ARM_CONDS "?$", REG_EXTENDED); + if (err) + goto out_free_call; +#undef ARM_CONDS + + arch->initialized = true; + arch->priv = arm; + arch->associate_instruction_ops = arm__associate_instruction_ops; + arch->objdump.comment_char = ';'; + arch->objdump.skip_functions_char = '+'; + return 0; + +out_free_call: + regfree(&arm->call_insn); +out_free_arm: + free(arm); + return -1; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1e96549650d7..bad44db7dd67 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -84,7 +84,7 @@ static int arch__grow_instructions(struct arch *arch) goto out_update_instructions; } -static __maybe_unused int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) +static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) { struct ins *ins; @@ -110,12 +110,7 @@ static __maybe_unused int arch__associate_ins_ops(struct arch* arch, const char static struct arch architectures[] = { { .name = "arm", - .instructions = arm__instructions, - .nr_instructions = ARRAY_SIZE(arm__instructions), - .objdump = { - .comment_char = ';', - .skip_functions_char = '+', - }, + .init = arm__annotate_init, }, { .name = "x86", From dbdebdc53822c38cc29b11f438f9bc70d7e18be2 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 23 Nov 2016 21:33:46 +0530 Subject: [PATCH 05/12] perf annotate: Initial PowerPC support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support the PowerPC architecture using the ins_ops association method. Committer notes: Testing it with a perf.data file collected on a PowerPC machine and cross-annotated on a x86_64 workstation, using the associated vmlinux file: $ perf report -i perf.data.f22vm.powerdev --vmlinux vmlinux.powerpc .ktime_get vmlinux.powerpc │ clrldi r9,r28,63 8.57 │ ┌──bne e0 <- TUI cursor positioned here │54:│ lwsync 2.86 │ │ std r2,40(r1) │ │ ld r9,144(r31) │ │ ld r3,136(r31) │ │ ld r30,184(r31) │ │ ld r10,0(r9) │ │ mtctr r10 │ │ ld r2,8(r9) 8.57 │ │→ bctrl │ │ ld r2,40(r1) │ │ ld r10,160(r31) │ │ ld r5,152(r31) │ │ lwz r7,168(r31) │ │ ld r9,176(r31) 8.57 │ │ lwz r6,172(r31) │ │ lwsync 2.86 │ │ lwz r8,128(r31) │ │ cmpw cr7,r8,r28 2.86 │ │↑ bne 48 │ │ subf r10,r10,r3 │ │ mr r3,r29 │ │ and r10,r10,r5 2.86 │ │ mulld r10,r10,r7 │ │ add r9,r10,r9 │ │ srd r9,r9,r6 │ │ add r9,r9,r30 │ │ std r9,0(r29) │ │ addi r1,r1,144 │ │ ld r0,16(r1) │ │ ld r28,-32(r1) │ │ ld r29,-24(r1) │ │ ld r30,-16(r1) │ │ mtlr r0 │ │ ld r31,-8(r1) │ │← blr 5.71 │e0:└─→mr r1,r1 11.43 │ mr r2,r2 11.43 │ lwz r28,128(r31) Press 'h' for help on key bindings $ perf report -i perf.data.f22vm.powerdev --header-only # ======== # captured on: Thu Nov 24 12:40:38 2016 # hostname : pdev-f22-qemu # os release : 4.4.10-200.fc22.ppc64 # perf version : 4.9.rc1.g6298ce # arch : ppc64 # nrcpus online : 48 # nrcpus avail : 48 # cpudesc : POWER7 (architected), altivec supported # cpuid : 74,513 # total memory : 4158976 kB # cmdline : /home/ravi/Workspace/linux/tools/perf/perf record -a # event : name = cycles:ppp, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1 # HEADER_CPU_TOPOLOGY info available, use -I to display # HEADER_NUMA_TOPOLOGY info available, use -I to display # pmu mappings: cpu = 4, software = 1, tracepoint = 2, breakpoint = 5 # missing features: HEADER_TRACING_DATA HEADER_BRANCH_STACK HEADER_GROUP_DESC HEADER_AUXTRACE HEADER_STAT HEADER_CACHE # ======== # $ Signed-off-by: Ravi Bangoria Signed-off-by: Naveen N. Rao Tested-by: Arnaldo Carvalho de Melo Cc: Kim Phillips Link: http://lkml.kernel.org/n/tip-tbjnp40ddoxxl474uvhwi6g4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/arch/powerpc/annotate/instructions.c | 58 +++++++++++++++++++ tools/perf/util/annotate.c | 5 ++ 2 files changed, 63 insertions(+) create mode 100644 tools/perf/arch/powerpc/annotate/instructions.c diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c new file mode 100644 index 000000000000..3c4004db81b9 --- /dev/null +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -0,0 +1,58 @@ +static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name) +{ + int i; + struct ins_ops *ops; + + /* + * - Interested only if instruction starts with 'b'. + * - Few start with 'b', but aren't branch instructions. + */ + if (name[0] != 'b' || + !strncmp(name, "bcd", 3) || + !strncmp(name, "brinc", 5) || + !strncmp(name, "bper", 4)) + return NULL; + + ops = &jump_ops; + + i = strlen(name) - 1; + if (i < 0) + return NULL; + + /* ignore optional hints at the end of the instructions */ + if (name[i] == '+' || name[i] == '-') + i--; + + if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) { + /* + * if the instruction ends up with 'l' or 'la', then + * those are considered 'calls' since they update LR. + * ... except for 'bnl' which is branch if not less than + * and the absolute form of the same. + */ + if (strcmp(name, "bnl") && strcmp(name, "bnl+") && + strcmp(name, "bnl-") && strcmp(name, "bnla") && + strcmp(name, "bnla+") && strcmp(name, "bnla-")) + ops = &call_ops; + } + if (name[i] == 'r' && name[i-1] == 'l') + /* + * instructions ending with 'lr' are considered to be + * return instructions + */ + ops = &ret_ops; + + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int powerpc__annotate_init(struct arch *arch) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = powerpc__associate_instruction_ops; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bad44db7dd67..3e34ee0fde28 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -106,6 +106,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" +#include "arch/powerpc/annotate/instructions.c" static struct arch architectures[] = { { @@ -120,6 +121,10 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "powerpc", + .init = powerpc__annotate_init, + }, }; static void ins__delete(struct ins_operands *ops) From 2d9bbf6eb3825739efa9e91c256ce7ead60d8367 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Nov 2016 10:11:13 +0900 Subject: [PATCH 06/12] perf callchain: Add option to skip ignore symbol when printing callchains For tracepoint events, callchains always contain certain functions. Sometimes it'd be better to skip those functions as they have no value. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20161124011114.7102-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 3 ++- tools/perf/util/evsel.h | 1 + tools/perf/util/evsel_fprintf.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 829468defa07..43fcc13e402d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1876,7 +1876,8 @@ static void timehist_print_sample(struct perf_sched *sched, sample__fprintf_sym(sample, al, 0, EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | - EVSEL__PRINT_CALLCHAIN_ARROW, + EVSEL__PRINT_CALLCHAIN_ARROW | + EVSEL__PRINT_SKIP_IGNORED, &callchain_cursor, stdout); out: diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 27fa3a343577..6abb89cd27f9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -392,6 +392,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, #define EVSEL__PRINT_SRCLINE (1<<5) #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) #define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) +#define EVSEL__PRINT_SKIP_IGNORED (1<<8) struct callchain_cursor; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 53bb614feafb..5a6f52284452 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -109,6 +109,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW; + int print_skip_ignored = print_opts & EVSEL__PRINT_SKIP_IGNORED; char s = print_oneline ? ' ' : '\t'; bool first = true; @@ -124,6 +125,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!node) break; + if (node->sym && node->sym->ignore && print_skip_ignored) + goto next; + printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); if (print_arrow && !first) @@ -162,8 +166,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!print_oneline) printed += fprintf(fp, "\n"); - callchain_cursor_advance(cursor); first = false; +next: + callchain_cursor_advance(cursor); } } From cdeb01bf7863718bbbbdac2a2c3a12b62366757a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Nov 2016 10:11:12 +0900 Subject: [PATCH 07/12] perf sched timehist: Mark schedule function in callchains The sched_switch event always captured from the scheduler function. So it'd be great omit them from the callchain. This patch marks the functions to be omitted by later patch. Committer notes: Testing it: Before: [root@jouet experimental]# perf sched record -g ls Dockerfile perf.data x-mips64 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.355 MB perf.data (29 samples) ] [root@jouet experimental]# perf sched timehist time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) ----------- ----- ----------------- ------ ------ ------ 6.494998 [001] 0.000 0.000 0.000 6.495027 [002] perf[519] 0.000 0.000 0.000 __schedule <- schedule <- schedule_hrtimeout_range_clock <- schedule_hrtimeou 6.495096 [003] 0.000 0.000 0.000 6.495100 [003] rcuos/0[9] 0.000 0.005 0.003 __schedule <- schedule <- rcu_nocb_kthread <- kthread <- ret_from_fork 6.495113 [001] perf[520] 0.000 0.008 0.114 __schedule <- preempt_schedule_common <- _cond_resched <- wait_for_completion 6.495121 [000] 0.000 0.000 0.000 6.495129 [001] migration/1[17] 0.000 0.003 0.016 __schedule <- schedule <- smpboot_thread_fn <- kthread <- ret_from_fork 6.496085 [002] 0.000 0.000 1.057 6.496096 [002] kworker/u16:1[31169] 0.000 0.004 0.011 __schedule <- schedule <- worker_thread <- kthread <- ret_from_fork 6.496096 [003] 0.003 0.000 0.996 6.496169 [002] 0.011 0.000 0.072 6.496171 [000] ls[520] 0.008 0.000 1.049 __schedule <- schedule <- do_exit <- do_group_exit <- [unknown] 6.496172 [003] gnome-terminal-[4391] 0.000 0.003 0.076 __schedule <- schedule <- schedule_hrtimeout_range_clock <- schedule_hrtimeo After: [root@jouet experimental]# perf sched timehist time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) ----------- ----- ----------------- ----- ----- ------ 6.494998 [001] 0.000 0.000 0.000 6.495027 [002] perf[519] 0.000 0.000 0.000 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_t 6.495096 [003] 0.000 0.000 0.000 6.495100 [003] rcuos/0[9] 0.000 0.005 0.003 rcu_nocb_kthread <- kthread <- ret_from_fork 6.495113 [001] perf[520] 0.000 0.008 0.114 preempt_schedule_common <- _cond_resched <- wait_for_completion <- stop_one_c 6.495121 [000] 0.000 0.000 0.000 6.495129 [001] migration/1[17] 0.000 0.003 0.016 smpboot_thread_fn <- kthread <- ret_from_fork 6.496085 [002] 0.000 0.000 1.057 6.496096 [002] kworker/u16:1[31169] 0.000 0.004 0.011 worker_thread <- kthread <- ret_from_fork 6.496096 [003] 0.003 0.000 0.996 6.496169 [002] 0.011 0.000 0.072 6.496171 [000] ls[520] 0.008 0.000 1.049 do_exit <- do_group_exit <- [unknown] 6.496172 [003] gnome-terminal-[4391] 0.000 0.003 0.076 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_ [root@jouet experimental]# Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20161124011114.7102-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 21 +++++++++++++++++++++ tools/perf/util/symbol.h | 1 + 2 files changed, 22 insertions(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 43fcc13e402d..06be809a02ab 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1966,7 +1966,28 @@ static bool is_idle_sample(struct perf_sched *sched, return false; } + callchain_cursor_commit(cursor); + + while (true) { + struct callchain_cursor_node *node; + struct symbol *sym; + + node = callchain_cursor_current(cursor); + if (node == NULL) + break; + + sym = node->sym; + if (sym && sym->name) { + if (!strcmp(sym->name, "schedule") || + !strcmp(sym->name, "__schedule") || + !strcmp(sym->name, "preempt_schedule")) + sym->ignore = 1; + } + + callchain_cursor_advance(cursor); + } + return false; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index dec7e2d44885..1bcbefc0c325 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -58,6 +58,7 @@ struct symbol { u16 namelen; u8 binding; u8 idle:1; + u8 ignore:1; u8 arch_sym; char name[0]; }; From 8388deb3ba4d36ffcae91a2a01cb2ea6f27553e6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 24 Nov 2016 10:11:14 +0900 Subject: [PATCH 08/12] perf sched timehist: Enlarge max stack depth by 2 When it records callchains, they will always have 2 scheduler functions (__schedule + schedule or __schedule + preempt_schedule) and get ignored. So it should collect 2 more functions to show the expected number of callchains to user. Committer Notes: Example of final result, using the same perf.data file as in the previous cset comment, but this time redirecting the output of 'perf sched timehist' to a file instead of copy'n'pasting from xterm: [root@jouet experimental]# perf sched timehist > /tmp/bla [root@jouet experimental]# cat /tmp/bla time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) -------- ---- -------------------- ------ ------ ----- 6.494998 [01] 0.000 0.000 0.000 6.495027 [02] perf[519] 0.000 0.000 0.000 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 6.495096 [03] 0.000 0.000 0.000 6.495100 [03] rcuos/0[9] 0.000 0.005 0.003 rcu_nocb_kthread <- kthread <- ret_from_fork 6.495113 [01] perf[520] 0.000 0.008 0.114 preempt_schedule_common <- _cond_resched <- wait_for_completion <- stop_one_cpu <- sched_exec <- do_execveat_common.isra.35 6.495121 [00] 0.000 0.000 0.000 6.495129 [01] migration/1[17] 0.000 0.003 0.016 smpboot_thread_fn <- kthread <- ret_from_fork 6.496085 [02] 0.000 0.000 1.057 6.496096 [02] kworker/u16:1[31169] 0.000 0.004 0.011 worker_thread <- kthread <- ret_from_fork 6.496096 [03] 0.003 0.000 0.996 6.496169 [02] 0.011 0.000 0.072 6.496171 [00] ls[520] 0.008 0.000 1.049 do_exit <- do_group_exit <- [unknown] <- entry_SYSCALL_64_fastpath 6.496172 [03] gnome-terminal-[4391] 0.000 0.003 0.076 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20161124011114.7102-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 06be809a02ab..a49a032f5b15 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1960,7 +1960,7 @@ static bool is_idle_sample(struct perf_sched *sched, return false; if (thread__resolve_callchain(thread, cursor, evsel, sample, - NULL, NULL, sched->max_stack) != 0) { + NULL, NULL, sched->max_stack + 2) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); From d18acd15c6dfb669f0463afa31ac5343594d2fe2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 15 Nov 2016 04:05:44 +0000 Subject: [PATCH 09/12] perf tools: Fix kernel version error in ubuntu On ubuntu the internal kernel version code is different from what can be retrived from uname: $ uname -r 4.4.0-47-generic $ cat /lib/modules/`uname -r`/build/include/generated/uapi/linux/version.h #define LINUX_VERSION_CODE 263192 #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) $ cat /lib/modules/`uname -r`/build/include/generated/utsrelease.h #define UTS_RELEASE "4.4.0-47-generic" #define UTS_UBUNTU_RELEASE_ABI 47 $ cat /proc/version_signature Ubuntu 4.4.0-47.68-generic 4.4.24 The macro LINUX_VERSION_CODE is set to 4.4.24 (263192 == 0x40418), but `uname -r` reports 4.4.0. This mismatch causes LINUX_VERSION_CODE macro passed to BPF script become an incorrect value, results in magic failure in BPF loading: $ sudo ./buildperf/perf record -e ./tools/perf/tests/bpf-script-example.c ls event syntax error: './tools/perf/tests/bpf-script-example.c' \___ Failed to load program for unknown reason According to Ubuntu document (https://wiki.ubuntu.com/Kernel/FAQ), the correct kernel version can be retrived through /proc/version_signature, which is ubuntu specific. This patch checks the existance of /proc/version_signature, and returns version number through parsing this file instead of uname. Version string is untouched (value returns from uname) because `uname -r` is required to be consistence with path of kbuild directory in /lib/module. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: He Kuang Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/20161115040617.69788-2-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 55 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 5bbd1f609f1f..67ac765da27a 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -637,12 +637,63 @@ bool find_process(const char *name) return ret ? false : true; } +static int +fetch_ubuntu_kernel_version(unsigned int *puint) +{ + ssize_t len; + size_t line_len = 0; + char *ptr, *line = NULL; + int version, patchlevel, sublevel, err; + FILE *vsig = fopen("/proc/version_signature", "r"); + + if (!vsig) { + pr_debug("Open /proc/version_signature failed: %s\n", + strerror(errno)); + return -1; + } + + len = getline(&line, &line_len, vsig); + fclose(vsig); + err = -1; + if (len <= 0) { + pr_debug("Reading from /proc/version_signature failed: %s\n", + strerror(errno)); + goto errout; + } + + ptr = strrchr(line, ' '); + if (!ptr) { + pr_debug("Parsing /proc/version_signature failed: %s\n", line); + goto errout; + } + + err = sscanf(ptr + 1, "%d.%d.%d", + &version, &patchlevel, &sublevel); + if (err != 3) { + pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", + line); + goto errout; + } + + if (puint) + *puint = (version << 16) + (patchlevel << 8) + sublevel; + err = 0; +errout: + free(line); + return err; +} + int fetch_kernel_version(unsigned int *puint, char *str, size_t str_size) { struct utsname utsname; int version, patchlevel, sublevel, err; + bool int_ver_ready = false; + + if (access("/proc/version_signature", R_OK) == 0) + if (!fetch_ubuntu_kernel_version(puint)) + int_ver_ready = true; if (uname(&utsname)) return -1; @@ -656,12 +707,12 @@ fetch_kernel_version(unsigned int *puint, char *str, &version, &patchlevel, &sublevel); if (err != 3) { - pr_debug("Unablt to get kernel version from uname '%s'\n", + pr_debug("Unable to get kernel version from uname '%s'\n", utsname.release); return -1; } - if (puint) + if (puint && !int_ver_ready) *puint = (version << 16) + (patchlevel << 8) + sublevel; return 0; } From 3dbe46c5245f61328797738c6a0a6cd4bf921f61 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 15 Nov 2016 04:05:45 +0000 Subject: [PATCH 10/12] perf record: Fix segfault when running with suid and kptr_restrict is 1 Before this patch perf panics if kptr_restrict is set to 1 and perf is owned by root with suid set: $ whoami wangnan $ ls -l ./perf -rwsr-xr-x 1 root root 19781908 Sep 21 19:29 /home/wangnan/perf $ cat /proc/sys/kernel/kptr_restrict 1 $ cat /proc/sys/kernel/perf_event_paranoid -1 $ ./perf record -a Segmentation fault (core dumped) $ The reason is that perf assumes it is allowed to read kptr from /proc/kallsyms when euid is root, but in fact the kernel doesn't allow reading kptr when euid and uid do not match with each other: $ cp /bin/cat . $ sudo chown root:root ./cat $ sudo chmod u+s ./cat $ cat /proc/kallsyms | grep do_fork 0000000000000000 T _do_fork <--- kptr is hidden even euid is root $ sudo cat /proc/kallsyms | grep do_fork ffffffff81080230 T _do_fork See lib/vsprintf.c for kernel side code. This patch fixes this problem by checking both uid and euid. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: He Kuang Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/20161115040617.69788-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index aecff69a510d..420ada9de22f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1962,7 +1962,7 @@ static bool symbol__read_kptr_restrict(void) char line[8]; if (fgets(line, sizeof(line), fp) != NULL) - value = (geteuid() != 0) ? + value = ((geteuid() != 0) || (getuid() != 0)) ? (atoi(line) != 0) : (atoi(line) == 2); From d6be16719e0b65f586ae4a301f02407422e6b5dd Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 15 Nov 2016 04:05:46 +0000 Subject: [PATCH 11/12] perf tools: Add missing struct definition in probe_event.h Commit 0b3c2264ae30 ("perf symbols: Fix kallsyms perf test on ppc64le") refers struct symbol in probe_event.h, but forgets to include its definition. Gcc will complain about it when that definition is not added, by sheer luck, by some other header included before probe_event.h. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: He Kuang Cc: Naveen N. Rao Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/20161115040617.69788-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 8091d15113f7..5d4e94061402 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -18,6 +18,8 @@ struct probe_conf { extern struct probe_conf probe_conf; extern bool probe_event_dry_run; +struct symbol; + /* kprobe-tracer and uprobe-tracer tracing point */ struct probe_trace_point { char *realname; /* function real name (if needed) */ From 4708bbda5cb2f6cdc331744597527143f46394d5 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 15 Nov 2016 04:05:47 +0000 Subject: [PATCH 12/12] tools lib bpf: Fix maps resolution It is not correct to assimilate the elf data of the maps section to an array of map definition. In fact the sizes differ. The offset provided in the symbol section has to be used instead. This patch fixes a bug causing a elf with two maps not to load correctly. Wang Nan added: This patch requires a name for each BPF map, so array of BPF maps is not allowed. This restriction is reasonable, because kernel verifier forbid indexing BPF map from such array unless the index is a fixed value, but if the index is fixed why not merging it into name? For example: Program like this: ... unsigned long cpu = get_smp_processor_id(); int *pval = map_lookup_elem(&map_array[cpu], &key); ... Generates bytecode like this: 0: (b7) r1 = 0 1: (63) *(u32 *)(r10 -4) = r1 2: (b7) r1 = 680997 3: (63) *(u32 *)(r10 -8) = r1 4: (85) call 8 5: (67) r0 <<= 4 6: (18) r1 = 0x112dd000 8: (0f) r0 += r1 9: (bf) r2 = r10 10: (07) r2 += -4 11: (bf) r1 = r0 12: (85) call 1 Where instruction 8 is the computation, 8 and 11 render r1 to an invalid value for function map_lookup_elem, causes verifier report error. Signed-off-by: Eric Leblond Cc: Alexei Starovoitov Cc: He Kuang Cc: Wang Nan [ Merge bpf_object__init_maps_name into bpf_object__init_maps. Fix segfault for buggy BPF script Validate obj->maps ] Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/20161115040617.69788-5-wangnan0@huawei.com Signed-off-by: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/libbpf.c | 146 ++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 46 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b699aea9a025..96a2b2ff1212 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -185,6 +185,7 @@ struct bpf_program { struct bpf_map { int fd; char *name; + size_t offset; struct bpf_map_def def; void *priv; bpf_map_clear_priv_t clear_priv; @@ -513,20 +514,83 @@ bpf_object__init_kversion(struct bpf_object *obj, } static int -bpf_object__init_maps(struct bpf_object *obj, void *data, - size_t size) +bpf_object__validate_maps(struct bpf_object *obj) { - size_t nr_maps; int i; - nr_maps = size / sizeof(struct bpf_map_def); - if (!data || !nr_maps) { - pr_debug("%s doesn't need map definition\n", - obj->path); + /* + * If there's only 1 map, the only error case should have been + * catched in bpf_object__init_maps(). + */ + if (!obj->maps || !obj->nr_maps || (obj->nr_maps == 1)) return 0; + + for (i = 1; i < obj->nr_maps; i++) { + const struct bpf_map *a = &obj->maps[i - 1]; + const struct bpf_map *b = &obj->maps[i]; + + if (b->offset - a->offset < sizeof(struct bpf_map_def)) { + pr_warning("corrupted map section in %s: map \"%s\" too small\n", + obj->path, a->name); + return -EINVAL; + } + } + return 0; +} + +static int compare_bpf_map(const void *_a, const void *_b) +{ + const struct bpf_map *a = _a; + const struct bpf_map *b = _b; + + return a->offset - b->offset; +} + +static int +bpf_object__init_maps(struct bpf_object *obj) +{ + int i, map_idx, nr_maps = 0; + Elf_Scn *scn; + Elf_Data *data; + Elf_Data *symbols = obj->efile.symbols; + + if (obj->efile.maps_shndx < 0) + return -EINVAL; + if (!symbols) + return -EINVAL; + + scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warning("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); + return -EINVAL; } - pr_debug("maps in %s: %zd bytes\n", obj->path, size); + /* + * Count number of maps. Each map has a name. + * Array of maps is not supported: only the first element is + * considered. + * + * TODO: Detect array of map and report error. + */ + for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, i, &sym)) + continue; + if (sym.st_shndx != obj->efile.maps_shndx) + continue; + nr_maps++; + } + + /* Alloc obj->maps and fill nr_maps. */ + pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, + nr_maps, data->d_size); + + if (!nr_maps) + return 0; obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); if (!obj->maps) { @@ -535,35 +599,21 @@ bpf_object__init_maps(struct bpf_object *obj, void *data, } obj->nr_maps = nr_maps; - for (i = 0; i < nr_maps; i++) { - struct bpf_map_def *def = &obj->maps[i].def; - - /* - * fill all fd with -1 so won't close incorrect - * fd (fd=0 is stdin) when failure (zclose won't close - * negative fd)). - */ + /* + * fill all fd with -1 so won't close incorrect + * fd (fd=0 is stdin) when failure (zclose won't close + * negative fd)). + */ + for (i = 0; i < nr_maps; i++) obj->maps[i].fd = -1; - /* Save map definition into obj->maps */ - *def = ((struct bpf_map_def *)data)[i]; - } - return 0; -} - -static int -bpf_object__init_maps_name(struct bpf_object *obj) -{ - int i; - Elf_Data *symbols = obj->efile.symbols; - - if (!symbols || obj->efile.maps_shndx < 0) - return -EINVAL; - - for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + /* + * Fill obj->maps using data in "maps" section. + */ + for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { GElf_Sym sym; - size_t map_idx; const char *map_name; + struct bpf_map_def *def; if (!gelf_getsym(symbols, i, &sym)) continue; @@ -573,21 +623,27 @@ bpf_object__init_maps_name(struct bpf_object *obj) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); - map_idx = sym.st_value / sizeof(struct bpf_map_def); - if (map_idx >= obj->nr_maps) { - pr_warning("index of map \"%s\" is buggy: %zu > %zu\n", - map_name, map_idx, obj->nr_maps); - continue; + obj->maps[map_idx].offset = sym.st_value; + if (sym.st_value + sizeof(struct bpf_map_def) > data->d_size) { + pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", + obj->path, map_name); + return -EINVAL; } + obj->maps[map_idx].name = strdup(map_name); if (!obj->maps[map_idx].name) { pr_warning("failed to alloc map name\n"); return -ENOMEM; } - pr_debug("map %zu is \"%s\"\n", map_idx, + pr_debug("map %d is \"%s\"\n", map_idx, obj->maps[map_idx].name); + def = (struct bpf_map_def *)(data->d_buf + sym.st_value); + obj->maps[map_idx].def = *def; + map_idx++; } - return 0; + + qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); + return bpf_object__validate_maps(obj); } static int bpf_object__elf_collect(struct bpf_object *obj) @@ -645,11 +701,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); - else if (strcmp(name, "maps") == 0) { - err = bpf_object__init_maps(obj, data->d_buf, - data->d_size); + else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; - } else if (sh.sh_type == SHT_SYMTAB) { + else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", obj->path); @@ -698,7 +752,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return LIBBPF_ERRNO__FORMAT; } if (obj->efile.maps_shndx >= 0) - err = bpf_object__init_maps_name(obj); + err = bpf_object__init_maps(obj); out: return err; } @@ -807,7 +861,7 @@ bpf_object__create_maps(struct bpf_object *obj) zclose(obj->maps[j].fd); return err; } - pr_debug("create map: fd=%d\n", *pfd); + pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd); } return 0;