mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 10:02:20 -04:00
Merge branch 'selftests/bpf: Fixes for map_percpu_stats test'
Hou Tao says: ==================== From: Hou Tao <houtao1@huawei.com> Hi, BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1]. It seems that the failure reason is per-cpu bpf memory allocator may not be able to allocate per-cpu pointer successfully and it can not refill free llist timely, and bpf_map_update_elem() will return -ENOMEM. Patch #1 fixes the size of value passed to per-cpu map update API. The problem was found when fixing the ENOMEM problem, so also post it in this patchset. Patch #2 & #3 mitigates the ENOMEM problem by retrying the update operation for non-preallocated per-cpu map. Please see individual patches for more details. And comments are always welcome. Regards, Tao [1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909 ==================== Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type)
|
||||
map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
|
||||
}
|
||||
|
||||
static bool is_percpu(__u32 map_type)
|
||||
{
|
||||
return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
|
||||
}
|
||||
|
||||
struct upsert_opts {
|
||||
__u32 map_type;
|
||||
int map_fd;
|
||||
__u32 n;
|
||||
bool retry_for_nomem;
|
||||
};
|
||||
|
||||
static int create_small_hash(void)
|
||||
@@ -148,19 +155,38 @@ static int create_small_hash(void)
|
||||
return map_fd;
|
||||
}
|
||||
|
||||
static bool retry_for_nomem_fn(int err)
|
||||
{
|
||||
return err == ENOMEM;
|
||||
}
|
||||
|
||||
static void *patch_map_thread(void *arg)
|
||||
{
|
||||
/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
|
||||
static __u8 blob[8 << 10];
|
||||
struct upsert_opts *opts = arg;
|
||||
void *val_ptr;
|
||||
int val;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < opts->n; i++) {
|
||||
if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
|
||||
if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
|
||||
val = create_small_hash();
|
||||
else
|
||||
val_ptr = &val;
|
||||
} else if (is_percpu(opts->map_type)) {
|
||||
val_ptr = blob;
|
||||
} else {
|
||||
val = rand();
|
||||
ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
|
||||
val_ptr = &val;
|
||||
}
|
||||
|
||||
/* 2 seconds may be enough ? */
|
||||
if (opts->retry_for_nomem)
|
||||
ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
|
||||
40, retry_for_nomem_fn);
|
||||
else
|
||||
ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
|
||||
CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
|
||||
|
||||
if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
|
||||
@@ -281,6 +307,13 @@ static void __test(int map_fd)
|
||||
else
|
||||
opts.n /= 2;
|
||||
|
||||
/* per-cpu bpf memory allocator may not be able to allocate per-cpu
|
||||
* pointer successfully and it can not refill free llist timely, and
|
||||
* bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
|
||||
* the problem temporarily.
|
||||
*/
|
||||
opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
|
||||
|
||||
/*
|
||||
* Upsert keys [0, n) under some competition: with random values from
|
||||
* N_THREADS threads. Check values, then delete all elements and check
|
||||
|
||||
@@ -1396,13 +1396,18 @@ static void test_map_stress(void)
|
||||
#define MAX_DELAY_US 50000
|
||||
#define MIN_DELAY_RANGE_US 5000
|
||||
|
||||
static int map_update_retriable(int map_fd, const void *key, const void *value,
|
||||
int flags, int attempts)
|
||||
static bool retry_for_again_or_busy(int err)
|
||||
{
|
||||
return (err == EAGAIN || err == EBUSY);
|
||||
}
|
||||
|
||||
int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
|
||||
retry_for_error_fn need_retry)
|
||||
{
|
||||
int delay = rand() % MIN_DELAY_RANGE_US;
|
||||
|
||||
while (bpf_map_update_elem(map_fd, key, value, flags)) {
|
||||
if (!attempts || (errno != EAGAIN && errno != EBUSY))
|
||||
if (!attempts || !need_retry(errno))
|
||||
return -errno;
|
||||
|
||||
if (delay <= MAX_DELAY_US / 2)
|
||||
@@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data)
|
||||
key = value = i;
|
||||
|
||||
if (do_update) {
|
||||
err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
|
||||
err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
|
||||
retry_for_again_or_busy);
|
||||
if (err)
|
||||
printf("error %d %d\n", err, errno);
|
||||
assert(err == 0);
|
||||
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
|
||||
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
|
||||
retry_for_again_or_busy);
|
||||
if (err)
|
||||
printf("error %d %d\n", err, errno);
|
||||
assert(err == 0);
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define CHECK(condition, tag, format...) ({ \
|
||||
int __ret = !!(condition); \
|
||||
@@ -16,4 +17,8 @@
|
||||
|
||||
extern int skips;
|
||||
|
||||
typedef bool (*retry_for_error_fn)(int err);
|
||||
int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
|
||||
retry_for_error_fn need_retry);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user