mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 03:11:11 -04:00
Merge branch 'bpf-fix-torn-writes-in-non-prealloc-htab-with-bpf_f_lock'
Mykyta Yatsenko says: ==================== bpf: Fix torn writes in non-prealloc htab with BPF_F_LOCK A torn write issue was reported in htab_map_update_elem() with BPF_F_LOCK on hash maps. The BPF_F_LOCK fast path performs a lockless lookup and copies the value under the element's embedded spin_lock. A concurrent delete can free the element via bpf_mem_cache_free(), which allows immediate reuse. When alloc_htab_elem() recycles the same memory, it writes the value with plain copy_map_value() without taking the spin_lock, racing with the stale lock holder and producing torn writes. Patch 1 fixes alloc_htab_elem() to use copy_map_value_locked() when BPF_F_LOCK is set. Patch 2 adds a selftest that reliably detects the torn writes on an unpatched kernel. Reported-by: Aaron Esau <aaron1esau@gmail.com> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com> ==================== Link: https://patch.msgid.link/20260401-bpf_map_torn_writes-v1-0-782d071c55e7@meta.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -1138,6 +1138,10 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
} else if (fd_htab_map_needs_adjust(htab)) {
|
||||
size = round_up(size, 8);
|
||||
memcpy(htab_elem_value(l_new, key_size), value, size);
|
||||
} else if (map_flags & BPF_F_LOCK) {
|
||||
copy_map_value_locked(&htab->map,
|
||||
htab_elem_value(l_new, key_size),
|
||||
value, false);
|
||||
} else {
|
||||
copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value);
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ static void *htab_update_fn(void *arg)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void test_htab_reuse(void)
|
||||
static void test_htab_reuse_basic(void)
|
||||
{
|
||||
unsigned int i, wr_nr = 1, rd_nr = 4;
|
||||
pthread_t tids[wr_nr + rd_nr];
|
||||
@@ -99,3 +99,170 @@ void test_htab_reuse(void)
|
||||
}
|
||||
htab_reuse__destroy(skel);
|
||||
}
|
||||
|
||||
/*
|
||||
* Writes consistency test for BPF_F_LOCK update
|
||||
*
|
||||
* The race:
|
||||
* 1. Thread A: BPF_F_LOCK|BPF_EXIST update
|
||||
* 2. Thread B: delete element then update it with BPF_ANY
|
||||
*/
|
||||
|
||||
struct htab_val_large {
|
||||
struct bpf_spin_lock lock;
|
||||
__u32 seq;
|
||||
__u64 data[256];
|
||||
};
|
||||
|
||||
struct consistency_ctx {
|
||||
int fd;
|
||||
int start_fd;
|
||||
int loop;
|
||||
volatile bool torn_write;
|
||||
};
|
||||
|
||||
static void wait_for_start(int fd)
|
||||
{
|
||||
char buf;
|
||||
|
||||
read(fd, &buf, 1);
|
||||
}
|
||||
|
||||
static void *locked_update_fn(void *arg)
|
||||
{
|
||||
struct consistency_ctx *ctx = arg;
|
||||
struct htab_val_large value;
|
||||
unsigned int key = 1;
|
||||
int i;
|
||||
|
||||
memset(&value, 0xAA, sizeof(value));
|
||||
wait_for_start(ctx->start_fd);
|
||||
|
||||
for (i = 0; i < ctx->loop; i++) {
|
||||
value.seq = i;
|
||||
bpf_map_update_elem(ctx->fd, &key, &value,
|
||||
BPF_F_LOCK | BPF_EXIST);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Delete + update: removes the element then re-creates it with BPF_ANY. */
|
||||
static void *delete_update_fn(void *arg)
|
||||
{
|
||||
struct consistency_ctx *ctx = arg;
|
||||
struct htab_val_large value;
|
||||
unsigned int key = 1;
|
||||
int i;
|
||||
|
||||
memset(&value, 0xBB, sizeof(value));
|
||||
|
||||
wait_for_start(ctx->start_fd);
|
||||
|
||||
for (i = 0; i < ctx->loop; i++) {
|
||||
value.seq = i;
|
||||
bpf_map_delete_elem(ctx->fd, &key);
|
||||
bpf_map_update_elem(ctx->fd, &key, &value, BPF_ANY | BPF_F_LOCK);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *locked_lookup_fn(void *arg)
|
||||
{
|
||||
struct consistency_ctx *ctx = arg;
|
||||
struct htab_val_large value;
|
||||
unsigned int key = 1;
|
||||
int i, j;
|
||||
|
||||
wait_for_start(ctx->start_fd);
|
||||
|
||||
for (i = 0; i < ctx->loop && !ctx->torn_write; i++) {
|
||||
if (bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK))
|
||||
continue;
|
||||
|
||||
for (j = 0; j < 256; j++) {
|
||||
if (value.data[j] != value.data[0]) {
|
||||
ctx->torn_write = true;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void test_htab_reuse_consistency(void)
|
||||
{
|
||||
int threads_total = 6, threads = 2;
|
||||
pthread_t tids[threads_total];
|
||||
struct consistency_ctx ctx;
|
||||
struct htab_val_large seed;
|
||||
struct htab_reuse *skel;
|
||||
unsigned int key = 1, i;
|
||||
int pipefd[2];
|
||||
int err;
|
||||
|
||||
skel = htab_reuse__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load"))
|
||||
return;
|
||||
|
||||
if (!ASSERT_OK(pipe(pipefd), "pipe"))
|
||||
goto out;
|
||||
|
||||
ctx.fd = bpf_map__fd(skel->maps.htab_lock_consistency);
|
||||
ctx.start_fd = pipefd[0];
|
||||
ctx.loop = 100000;
|
||||
ctx.torn_write = false;
|
||||
|
||||
/* Seed the element so locked updaters have something to find */
|
||||
memset(&seed, 0xBB, sizeof(seed));
|
||||
err = bpf_map_update_elem(ctx.fd, &key, &seed, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "seed_element"))
|
||||
goto close_pipe;
|
||||
|
||||
memset(tids, 0, sizeof(tids));
|
||||
for (i = 0; i < threads; i++) {
|
||||
err = pthread_create(&tids[i], NULL, locked_update_fn, &ctx);
|
||||
if (!ASSERT_OK(err, "pthread_create"))
|
||||
goto stop;
|
||||
}
|
||||
for (i = 0; i < threads; i++) {
|
||||
err = pthread_create(&tids[threads + i], NULL, delete_update_fn, &ctx);
|
||||
if (!ASSERT_OK(err, "pthread_create"))
|
||||
goto stop;
|
||||
}
|
||||
for (i = 0; i < threads; i++) {
|
||||
err = pthread_create(&tids[threads * 2 + i], NULL, locked_lookup_fn, &ctx);
|
||||
if (!ASSERT_OK(err, "pthread_create"))
|
||||
goto stop;
|
||||
}
|
||||
|
||||
/* Release all threads simultaneously */
|
||||
close(pipefd[1]);
|
||||
pipefd[1] = -1;
|
||||
|
||||
stop:
|
||||
for (i = 0; i < threads_total; i++) {
|
||||
if (!tids[i])
|
||||
continue;
|
||||
pthread_join(tids[i], NULL);
|
||||
}
|
||||
|
||||
ASSERT_FALSE(ctx.torn_write, "no torn writes detected");
|
||||
|
||||
close_pipe:
|
||||
if (pipefd[1] >= 0)
|
||||
close(pipefd[1]);
|
||||
close(pipefd[0]);
|
||||
out:
|
||||
htab_reuse__destroy(skel);
|
||||
}
|
||||
|
||||
void test_htab_reuse(void)
|
||||
{
|
||||
if (test__start_subtest("basic"))
|
||||
test_htab_reuse_basic();
|
||||
if (test__start_subtest("consistency"))
|
||||
test_htab_reuse_consistency();
|
||||
}
|
||||
|
||||
@@ -17,3 +17,19 @@ struct {
|
||||
__type(value, struct htab_val);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
} htab SEC(".maps");
|
||||
|
||||
#define HTAB_NDATA 256
|
||||
|
||||
struct htab_val_large {
|
||||
struct bpf_spin_lock lock;
|
||||
__u32 seq;
|
||||
__u64 data[HTAB_NDATA];
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, 8);
|
||||
__type(key, unsigned int);
|
||||
__type(value, struct htab_val_large);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
} htab_lock_consistency SEC(".maps");
|
||||
|
||||
Reference in New Issue
Block a user