mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-05 06:18:45 -04:00
Merge branch 'Use this_cpu_xxx for preemption-safety'
Hou Tao says: ==================== From: Hou Tao <houtao1@huawei.com> Hi, The patchset aims to make the update of per-cpu prog->active and per-cpu bpf_task_storage_busy being preemption-safe. The problem is on same architectures (e.g. arm64), __this_cpu_{inc|dec|inc_return} are neither preemption-safe nor IRQ-safe, so under fully preemptible kernel the concurrent updates on these per-cpu variables may be interleaved and the final values of these variables may be not zero. Patch 1 & 2 use the preemption-safe per-cpu helpers to manipulate prog->active and bpf_task_storage_busy. Patch 3 & 4 add a test case in map_tests to show the concurrent updates on the per-cpu bpf_task_storage_busy by using __this_cpu_{inc|dec} are not atomic. Comments are always welcome. Regards, Tao Change Log: v2: * Patch 1: update commit message to indicate the problem is only possible for fully preemptible kernel * Patch 2: a new patch which fixes the problem for prog->active * Patch 3 & 4: move it to test_maps and make it depend on CONFIG_PREEMPT v1: https://lore.kernel.org/bpf/20220829142752.330094-1-houtao@huaweicloud.com/ ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
@@ -555,11 +555,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
|
||||
struct bpf_local_storage_elem, map_node))) {
|
||||
if (busy_counter) {
|
||||
migrate_disable();
|
||||
__this_cpu_inc(*busy_counter);
|
||||
this_cpu_inc(*busy_counter);
|
||||
}
|
||||
bpf_selem_unlink(selem, false);
|
||||
if (busy_counter) {
|
||||
__this_cpu_dec(*busy_counter);
|
||||
this_cpu_dec(*busy_counter);
|
||||
migrate_enable();
|
||||
}
|
||||
cond_resched_rcu();
|
||||
|
||||
@@ -26,20 +26,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy);
|
||||
static void bpf_task_storage_lock(void)
|
||||
{
|
||||
migrate_disable();
|
||||
__this_cpu_inc(bpf_task_storage_busy);
|
||||
this_cpu_inc(bpf_task_storage_busy);
|
||||
}
|
||||
|
||||
static void bpf_task_storage_unlock(void)
|
||||
{
|
||||
__this_cpu_dec(bpf_task_storage_busy);
|
||||
this_cpu_dec(bpf_task_storage_busy);
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
static bool bpf_task_storage_trylock(void)
|
||||
{
|
||||
migrate_disable();
|
||||
if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
|
||||
__this_cpu_dec(bpf_task_storage_busy);
|
||||
if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
|
||||
this_cpu_dec(bpf_task_storage_busy);
|
||||
migrate_enable();
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -895,7 +895,7 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *ru
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
inc_misses_counter(prog);
|
||||
return 0;
|
||||
}
|
||||
@@ -930,7 +930,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
|
||||
update_prog_stats(prog, start);
|
||||
__this_cpu_dec(*(prog->active));
|
||||
this_cpu_dec(*(prog->active));
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@@ -966,7 +966,7 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r
|
||||
migrate_disable();
|
||||
might_fault();
|
||||
|
||||
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
inc_misses_counter(prog);
|
||||
return 0;
|
||||
}
|
||||
@@ -982,7 +982,7 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
|
||||
update_prog_stats(prog, start);
|
||||
__this_cpu_dec(*(prog->active));
|
||||
this_cpu_dec(*(prog->active));
|
||||
migrate_enable();
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
122
tools/testing/selftests/bpf/map_tests/task_storage_map.c
Normal file
122
tools/testing/selftests/bpf/map_tests/task_storage_map.c
Normal file
@@ -0,0 +1,122 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
#include "test_maps.h"
|
||||
#include "task_local_storage_helpers.h"
|
||||
#include "read_bpf_task_storage_busy.skel.h"
|
||||
|
||||
struct lookup_ctx {
|
||||
bool start;
|
||||
bool stop;
|
||||
int pid_fd;
|
||||
int map_fd;
|
||||
int loop;
|
||||
};
|
||||
|
||||
static void *lookup_fn(void *arg)
|
||||
{
|
||||
struct lookup_ctx *ctx = arg;
|
||||
long value;
|
||||
int i = 0;
|
||||
|
||||
while (!ctx->start)
|
||||
usleep(1);
|
||||
|
||||
while (!ctx->stop && i++ < ctx->loop)
|
||||
bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
ctx->stop = true;
|
||||
ctx->start = true;
|
||||
for (i = 0; i < nr; i++)
|
||||
pthread_join(tids[i], NULL);
|
||||
}
|
||||
|
||||
void test_task_storage_map_stress_lookup(void)
|
||||
{
|
||||
#define MAX_NR_THREAD 4096
|
||||
unsigned int i, nr = 256, loop = 8192, cpu = 0;
|
||||
struct read_bpf_task_storage_busy *skel;
|
||||
pthread_t tids[MAX_NR_THREAD];
|
||||
struct lookup_ctx ctx;
|
||||
cpu_set_t old, new;
|
||||
const char *cfg;
|
||||
int err;
|
||||
|
||||
cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
|
||||
if (cfg) {
|
||||
nr = atoi(cfg);
|
||||
if (nr > MAX_NR_THREAD)
|
||||
nr = MAX_NR_THREAD;
|
||||
}
|
||||
cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
|
||||
if (cfg)
|
||||
loop = atoi(cfg);
|
||||
cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
|
||||
if (cfg)
|
||||
cpu = atoi(cfg);
|
||||
|
||||
skel = read_bpf_task_storage_busy__open_and_load();
|
||||
err = libbpf_get_error(skel);
|
||||
CHECK(err, "open_and_load", "error %d\n", err);
|
||||
|
||||
/* Only for a fully preemptible kernel */
|
||||
if (!skel->kconfig->CONFIG_PREEMPT)
|
||||
return;
|
||||
|
||||
/* Save the old affinity setting */
|
||||
sched_getaffinity(getpid(), sizeof(old), &old);
|
||||
|
||||
/* Pinned on a specific CPU */
|
||||
CPU_ZERO(&new);
|
||||
CPU_SET(cpu, &new);
|
||||
sched_setaffinity(getpid(), sizeof(new), &new);
|
||||
|
||||
ctx.start = false;
|
||||
ctx.stop = false;
|
||||
ctx.pid_fd = sys_pidfd_open(getpid(), 0);
|
||||
ctx.map_fd = bpf_map__fd(skel->maps.task);
|
||||
ctx.loop = loop;
|
||||
for (i = 0; i < nr; i++) {
|
||||
err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
|
||||
if (err) {
|
||||
abort_lookup(&ctx, tids, i);
|
||||
CHECK(err, "pthread_create", "error %d\n", err);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ctx.start = true;
|
||||
for (i = 0; i < nr; i++)
|
||||
pthread_join(tids[i], NULL);
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
err = read_bpf_task_storage_busy__attach(skel);
|
||||
CHECK(err, "attach", "error %d\n", err);
|
||||
|
||||
/* Trigger program */
|
||||
syscall(SYS_gettid);
|
||||
skel->bss->pid = 0;
|
||||
|
||||
CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
|
||||
out:
|
||||
read_bpf_task_storage_busy__destroy(skel);
|
||||
/* Restore affinity setting */
|
||||
sched_setaffinity(getpid(), sizeof(old), &old);
|
||||
}
|
||||
@@ -9,18 +9,10 @@
|
||||
|
||||
#include "bprm_opts.skel.h"
|
||||
#include "network_helpers.h"
|
||||
|
||||
#ifndef __NR_pidfd_open
|
||||
#define __NR_pidfd_open 434
|
||||
#endif
|
||||
#include "task_local_storage_helpers.h"
|
||||
|
||||
static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
|
||||
|
||||
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_pidfd_open, pid, flags);
|
||||
}
|
||||
|
||||
static int update_storage(int map_fd, int secureexec)
|
||||
{
|
||||
int task_fd, ret = 0;
|
||||
|
||||
@@ -11,15 +11,7 @@
|
||||
|
||||
#include "local_storage.skel.h"
|
||||
#include "network_helpers.h"
|
||||
|
||||
#ifndef __NR_pidfd_open
|
||||
#define __NR_pidfd_open 434
|
||||
#endif
|
||||
|
||||
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_pidfd_open, pid, flags);
|
||||
}
|
||||
#include "task_local_storage_helpers.h"
|
||||
|
||||
static unsigned int duration;
|
||||
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
extern bool CONFIG_PREEMPT __kconfig __weak;
|
||||
extern const int bpf_task_storage_busy __ksym;
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
int pid = 0;
|
||||
int busy = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, long);
|
||||
} task SEC(".maps");
|
||||
|
||||
SEC("raw_tp/sys_enter")
|
||||
int BPF_PROG(read_bpf_task_storage_busy)
|
||||
{
|
||||
int *value;
|
||||
int key;
|
||||
|
||||
if (!CONFIG_PREEMPT)
|
||||
return 0;
|
||||
|
||||
if (bpf_get_current_pid_tgid() >> 32 != pid)
|
||||
return 0;
|
||||
|
||||
value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
|
||||
if (value)
|
||||
busy = *value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
18
tools/testing/selftests/bpf/task_local_storage_helpers.h
Normal file
18
tools/testing/selftests/bpf/task_local_storage_helpers.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __TASK_LOCAL_STORAGE_HELPER_H
|
||||
#define __TASK_LOCAL_STORAGE_HELPER_H
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifndef __NR_pidfd_open
|
||||
#define __NR_pidfd_open 434
|
||||
#endif
|
||||
|
||||
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_pidfd_open, pid, flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user