mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-05 14:40:12 -04:00
Merge branch 'bpf_get_task_stack'
Song Liu says:
====================
This set introduces a new helper bpf_get_task_stack(). The primary use case
is to dump all /proc/*/stack to seq_file via bpf_iter__task.
A few different approaches have been explored and compared:
1. A simple wrapper around stack_trace_save_tsk(), as v1 [1].
This approach introduces new syntax, which is different to existing
helper bpf_get_stack(). Therefore, this is not ideal.
2. Extend get_perf_callchain() to support "task" as argument.
This approach reuses most of bpf_get_stack(). However, extending
get_perf_callchain() requires non-trivial changes to architecture
specific code. Which is error prone.
3. Current (v2) approach, leverages most of existing bpf_get_stack(), and
uses stack_trace_save_tsk() to handle architecture specific logic.
[1] https://lore.kernel.org/netdev/20200623070802.2310018-1-songliubraving@fb.com/
Changes v4 => v5:
1. Rebase and work around git-am issue. (Alexei)
2. Update commit log for 4/4. (Yonghong)
Changes v3 => v4:
1. Simplify the selftests with bpf_iter.h. (Yonghong)
2. Add example output to commit log of 4/4. (Yonghong)
Changes v2 => v3:
1. Rebase on top of bpf-next. (Yonghong)
2. Sanitize get_callchain_entry(). (Peter)
3. Use has_callchain_buf for bpf_get_task_stack. (Andrii)
4. Other small clean up. (Yonghong, Andrii).
Changes v1 => v2:
1. Reuse most of bpf_get_stack() logic. (Andrii)
2. Fix unsigned long vs. u64 mismatch for 32-bit systems. (Yonghong)
3. Add %pB support in bpf_trace_printk(). (Daniel)
4. Fix buffer size to bytes.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -1627,6 +1627,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
|
||||
extern const struct bpf_func_proto bpf_get_current_comm_proto;
|
||||
extern const struct bpf_func_proto bpf_get_stackid_proto;
|
||||
extern const struct bpf_func_proto bpf_get_stack_proto;
|
||||
extern const struct bpf_func_proto bpf_get_task_stack_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_map_update_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
|
||||
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
|
||||
|
||||
@@ -1244,6 +1244,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
|
||||
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
|
||||
extern int get_callchain_buffers(int max_stack);
|
||||
extern void put_callchain_buffers(void);
|
||||
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
|
||||
extern void put_callchain_entry(int rctx);
|
||||
|
||||
extern int sysctl_perf_event_max_stack;
|
||||
extern int sysctl_perf_event_max_contexts_per_stack;
|
||||
|
||||
@@ -3285,6 +3285,39 @@ union bpf_attr {
|
||||
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
|
||||
* Return
|
||||
* *sk* if casting is valid, or NULL otherwise.
|
||||
*
|
||||
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* Return a user or a kernel stack in bpf program provided buffer.
|
||||
* To achieve this, the helper needs *task*, which is a valid
|
||||
* pointer to struct task_struct. To store the stacktrace, the
|
||||
* bpf program provides *buf* with a nonnegative *size*.
|
||||
*
|
||||
* The last argument, *flags*, holds the number of stack frames to
|
||||
* skip (from 0 to 255), masked with
|
||||
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
|
||||
* the following flags:
|
||||
*
|
||||
* **BPF_F_USER_STACK**
|
||||
* Collect a user space stack instead of a kernel stack.
|
||||
* **BPF_F_USER_BUILD_ID**
|
||||
* Collect buildid+offset instead of ips for user stack,
|
||||
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||
*
|
||||
* **bpf_get_task_stack**\ () can collect up to
|
||||
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
|
||||
* to sufficient large buffer size. Note that
|
||||
* this limit can be controlled with the **sysctl** program, and
|
||||
* that it should be manually increased in order to profile long
|
||||
* user stacks (such as stacks for Java programs). To do so, use:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* # sysctl kernel.perf_event_max_stack=<new value>
|
||||
* Return
|
||||
* A non-negative value equal to or less than *size* on success,
|
||||
* or a negative error in case of failure.
|
||||
*
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -3427,7 +3460,9 @@ union bpf_attr {
|
||||
FN(skc_to_tcp_sock), \
|
||||
FN(skc_to_tcp_timewait_sock), \
|
||||
FN(skc_to_tcp_request_sock), \
|
||||
FN(skc_to_udp6_sock),
|
||||
FN(skc_to_udp6_sock), \
|
||||
FN(get_task_stack), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
||||
@@ -348,6 +348,40 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||
}
|
||||
}
|
||||
|
||||
static struct perf_callchain_entry *
|
||||
get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
|
||||
{
|
||||
struct perf_callchain_entry *entry;
|
||||
int rctx;
|
||||
|
||||
entry = get_callchain_entry(&rctx);
|
||||
|
||||
if (!entry)
|
||||
return NULL;
|
||||
|
||||
entry->nr = init_nr +
|
||||
stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
|
||||
sysctl_perf_event_max_stack - init_nr, 0);
|
||||
|
||||
/* stack_trace_save_tsk() works on unsigned long array, while
|
||||
* perf_callchain_entry uses u64 array. For 32-bit systems, it is
|
||||
* necessary to fix this mismatch.
|
||||
*/
|
||||
if (__BITS_PER_LONG != 64) {
|
||||
unsigned long *from = (unsigned long *) entry->ip;
|
||||
u64 *to = entry->ip;
|
||||
int i;
|
||||
|
||||
/* copy data from the end to avoid using extra buffer */
|
||||
for (i = entry->nr - 1; i >= (int)init_nr; i--)
|
||||
to[i] = (u64)(from[i]);
|
||||
}
|
||||
|
||||
put_callchain_entry(rctx);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
{
|
||||
@@ -448,8 +482,8 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||
void *buf, u32 size, u64 flags)
|
||||
{
|
||||
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
|
||||
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
|
||||
@@ -471,13 +505,22 @@ BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
if (unlikely(size % elem_size))
|
||||
goto clear;
|
||||
|
||||
/* cannot get valid user stack for task without user_mode regs */
|
||||
if (task && user && !user_mode(regs))
|
||||
goto err_fault;
|
||||
|
||||
num_elem = size / elem_size;
|
||||
if (sysctl_perf_event_max_stack < num_elem)
|
||||
init_nr = 0;
|
||||
else
|
||||
init_nr = sysctl_perf_event_max_stack - num_elem;
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
sysctl_perf_event_max_stack, false, false);
|
||||
|
||||
if (kernel && task)
|
||||
trace = get_callchain_entry_for_task(task, init_nr);
|
||||
else
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
sysctl_perf_event_max_stack,
|
||||
false, false);
|
||||
if (unlikely(!trace))
|
||||
goto err_fault;
|
||||
|
||||
@@ -505,6 +548,12 @@ BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
return err;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
{
|
||||
return __bpf_get_stack(regs, NULL, buf, size, flags);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stack_proto = {
|
||||
.func = bpf_get_stack,
|
||||
.gpl_only = true,
|
||||
@@ -515,6 +564,26 @@ const struct bpf_func_proto bpf_get_stack_proto = {
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
|
||||
u32, size, u64, flags)
|
||||
{
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
|
||||
return __bpf_get_stack(regs, task, buf, size, flags);
|
||||
}
|
||||
|
||||
static int bpf_get_task_stack_btf_ids[5];
|
||||
const struct bpf_func_proto bpf_get_task_stack_proto = {
|
||||
.func = bpf_get_task_stack,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
.btf_id = bpf_get_task_stack_btf_ids,
|
||||
};
|
||||
|
||||
/* Called from eBPF program */
|
||||
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
|
||||
@@ -4864,7 +4864,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
|
||||
if ((func_id == BPF_FUNC_get_stack ||
|
||||
func_id == BPF_FUNC_get_task_stack) &&
|
||||
!env->prog->has_callchain_buf) {
|
||||
const char *err_str;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
||||
@@ -149,7 +149,7 @@ void put_callchain_buffers(void)
|
||||
}
|
||||
}
|
||||
|
||||
static struct perf_callchain_entry *get_callchain_entry(int *rctx)
|
||||
struct perf_callchain_entry *get_callchain_entry(int *rctx)
|
||||
{
|
||||
int cpu;
|
||||
struct callchain_cpus_entries *entries;
|
||||
@@ -159,8 +159,10 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
|
||||
return NULL;
|
||||
|
||||
entries = rcu_dereference(callchain_cpus_entries);
|
||||
if (!entries)
|
||||
if (!entries) {
|
||||
put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cpu = smp_processor_id();
|
||||
|
||||
@@ -168,7 +170,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
|
||||
(*rctx * perf_callchain_entry__sizeof()));
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
put_callchain_entry(int rctx)
|
||||
{
|
||||
put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
|
||||
@@ -183,11 +185,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
|
||||
int rctx;
|
||||
|
||||
entry = get_callchain_entry(&rctx);
|
||||
if (rctx == -1)
|
||||
return NULL;
|
||||
|
||||
if (!entry)
|
||||
goto exit_put;
|
||||
return NULL;
|
||||
|
||||
ctx.entry = entry;
|
||||
ctx.max_stack = max_stack;
|
||||
|
||||
@@ -376,7 +376,7 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
|
||||
|
||||
/*
|
||||
* Only limited trace_printk() conversion specifiers allowed:
|
||||
* %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pks %pus %s
|
||||
* %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
|
||||
*/
|
||||
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
|
||||
u64, arg2, u64, arg3)
|
||||
@@ -420,6 +420,11 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
|
||||
goto fmt_str;
|
||||
}
|
||||
|
||||
if (fmt[i + 1] == 'B') {
|
||||
i++;
|
||||
goto fmt_next;
|
||||
}
|
||||
|
||||
/* disallow any further format extensions */
|
||||
if (fmt[i + 1] != 0 &&
|
||||
!isspace(fmt[i + 1]) &&
|
||||
@@ -636,7 +641,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
|
||||
if (fmt[i] == 'p') {
|
||||
if (fmt[i + 1] == 0 ||
|
||||
fmt[i + 1] == 'K' ||
|
||||
fmt[i + 1] == 'x') {
|
||||
fmt[i + 1] == 'x' ||
|
||||
fmt[i + 1] == 'B') {
|
||||
/* just kernel pointers */
|
||||
params[fmt_cnt] = args[fmt_cnt];
|
||||
fmt_cnt++;
|
||||
@@ -1137,6 +1143,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_ringbuf_query_proto;
|
||||
case BPF_FUNC_jiffies64:
|
||||
return &bpf_jiffies64_proto;
|
||||
case BPF_FUNC_get_task_stack:
|
||||
return &bpf_get_task_stack_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -426,6 +426,7 @@ class PrinterHelpers(Printer):
|
||||
'struct tcp_timewait_sock',
|
||||
'struct tcp_request_sock',
|
||||
'struct udp6_sock',
|
||||
'struct task_struct',
|
||||
|
||||
'struct __sk_buff',
|
||||
'struct sk_msg_md',
|
||||
@@ -468,6 +469,7 @@ class PrinterHelpers(Printer):
|
||||
'struct tcp_timewait_sock',
|
||||
'struct tcp_request_sock',
|
||||
'struct udp6_sock',
|
||||
'struct task_struct',
|
||||
}
|
||||
mapped_types = {
|
||||
'u8': '__u8',
|
||||
|
||||
@@ -3285,6 +3285,39 @@ union bpf_attr {
|
||||
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
|
||||
* Return
|
||||
* *sk* if casting is valid, or NULL otherwise.
|
||||
*
|
||||
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* Return a user or a kernel stack in bpf program provided buffer.
|
||||
* To achieve this, the helper needs *task*, which is a valid
|
||||
* pointer to struct task_struct. To store the stacktrace, the
|
||||
* bpf program provides *buf* with a nonnegative *size*.
|
||||
*
|
||||
* The last argument, *flags*, holds the number of stack frames to
|
||||
* skip (from 0 to 255), masked with
|
||||
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
|
||||
* the following flags:
|
||||
*
|
||||
* **BPF_F_USER_STACK**
|
||||
* Collect a user space stack instead of a kernel stack.
|
||||
* **BPF_F_USER_BUILD_ID**
|
||||
* Collect buildid+offset instead of ips for user stack,
|
||||
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||
*
|
||||
* **bpf_get_task_stack**\ () can collect up to
|
||||
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
|
||||
* to sufficient large buffer size. Note that
|
||||
* this limit can be controlled with the **sysctl** program, and
|
||||
* that it should be manually increased in order to profile long
|
||||
* user stacks (such as stacks for Java programs). To do so, use:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* # sysctl kernel.perf_event_max_stack=<new value>
|
||||
* Return
|
||||
* A non-negative value equal to or less than *size* on success,
|
||||
* or a negative error in case of failure.
|
||||
*
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -3427,7 +3460,9 @@ union bpf_attr {
|
||||
FN(skc_to_tcp_sock), \
|
||||
FN(skc_to_tcp_timewait_sock), \
|
||||
FN(skc_to_tcp_request_sock), \
|
||||
FN(skc_to_udp6_sock),
|
||||
FN(skc_to_udp6_sock), \
|
||||
FN(get_task_stack), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "bpf_iter_netlink.skel.h"
|
||||
#include "bpf_iter_bpf_map.skel.h"
|
||||
#include "bpf_iter_task.skel.h"
|
||||
#include "bpf_iter_task_stack.skel.h"
|
||||
#include "bpf_iter_task_file.skel.h"
|
||||
#include "bpf_iter_tcp4.skel.h"
|
||||
#include "bpf_iter_tcp6.skel.h"
|
||||
@@ -110,6 +111,20 @@ static void test_task(void)
|
||||
bpf_iter_task__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_task_stack(void)
|
||||
{
|
||||
struct bpf_iter_task_stack *skel;
|
||||
|
||||
skel = bpf_iter_task_stack__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
do_dummy_read(skel->progs.dump_task_stack);
|
||||
|
||||
bpf_iter_task_stack__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_task_file(void)
|
||||
{
|
||||
struct bpf_iter_task_file *skel;
|
||||
@@ -452,6 +467,8 @@ void test_bpf_iter(void)
|
||||
test_bpf_map();
|
||||
if (test__start_subtest("task"))
|
||||
test_task();
|
||||
if (test__start_subtest("task_stack"))
|
||||
test_task_stack();
|
||||
if (test__start_subtest("task_file"))
|
||||
test_task_file();
|
||||
if (test__start_subtest("tcp4"))
|
||||
|
||||
37
tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
Normal file
37
tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
Normal file
@@ -0,0 +1,37 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include "bpf_iter.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
#define MAX_STACK_TRACE_DEPTH 64
|
||||
unsigned long entries[MAX_STACK_TRACE_DEPTH];
|
||||
#define SIZE_OF_ULONG (sizeof(unsigned long))
|
||||
|
||||
SEC("iter/task")
|
||||
int dump_task_stack(struct bpf_iter__task *ctx)
|
||||
{
|
||||
struct seq_file *seq = ctx->meta->seq;
|
||||
struct task_struct *task = ctx->task;
|
||||
long i, retlen;
|
||||
|
||||
if (task == (void *)0)
|
||||
return 0;
|
||||
|
||||
retlen = bpf_get_task_stack(task, entries,
|
||||
MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
|
||||
if (retlen < 0)
|
||||
return 0;
|
||||
|
||||
BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
|
||||
retlen / SIZE_OF_ULONG);
|
||||
for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
|
||||
if (retlen > i * SIZE_OF_ULONG)
|
||||
BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
|
||||
}
|
||||
BPF_SEQ_PRINTF(seq, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user