mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-19 04:42:35 -04:00
Merge branch 'memcg-accounting-for-bpf-arena'
Puranjay Mohan says: ==================== memcg accounting for BPF arena v4: https://lore.kernel.org/all/20260102181333.3033679-1-puranjay@kernel.org/ Changes in v4->v5: - Remove unused variables from bpf_map_alloc_pages() (CI) v3: https://lore.kernel.org/all/20260102151852.570285-1-puranjay@kernel.org/ Changes in v3->v4: - Do memcg set/recover in arena_reserve_pages() rather than bpf_arena_reserve_pages() for symmetry with other kfuncs (Alexei) v2: https://lore.kernel.org/all/20251231141434.3416822-1-puranjay@kernel.org/ Changes in v2->v3: - Remove memcg accounting from bpf_map_alloc_pages() as the caller does it already. (Alexei) - Do memcg set/recover in arena_alloc/free_pages() rather than bpf_arena_alloc/free_pages(), it reduces copy pasting in sleepable/non_sleepable functions. v1: https://lore.kernel.org/all/20251230153006.1347742-1-puranjay@kernel.org/ Changes in v1->v2: - Return both pointers through arguments from bpf_map_memcg_enter and make it return void. (Alexei) - Add memcg accounting in arena_free_worker (AI) This set adds memcg accounting logic into arena kfuncs and other places that do allocations in arena.c. ==================== Link: https://patch.msgid.link/20260102200230.25168-1-puranjay@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -2608,6 +2608,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
|
||||
unsigned long nr_pages, struct page **page_array);
|
||||
#ifdef CONFIG_MEMCG
|
||||
void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
|
||||
struct mem_cgroup **new_memcg);
|
||||
void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
|
||||
struct mem_cgroup *memcg);
|
||||
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
|
||||
int node);
|
||||
void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
|
||||
@@ -2632,6 +2636,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
|
||||
kvcalloc(_n, _size, _flags)
|
||||
#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
|
||||
__alloc_percpu_gfp(_size, _align, _flags)
|
||||
static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
|
||||
struct mem_cgroup **new_memcg)
|
||||
{
|
||||
*new_memcg = NULL;
|
||||
*old_memcg = NULL;
|
||||
}
|
||||
|
||||
static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
|
||||
@@ -360,6 +360,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct bpf_map *map = vmf->vma->vm_file->private_data;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
struct mem_cgroup *new_memcg, *old_memcg;
|
||||
struct page *page;
|
||||
long kbase, kaddr;
|
||||
unsigned long flags;
|
||||
@@ -377,6 +378,8 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
|
||||
/* already have a page vmap-ed */
|
||||
goto out;
|
||||
|
||||
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
|
||||
|
||||
if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT)
|
||||
/* User space requested to segfault when page is not allocated by bpf prog */
|
||||
goto out_unlock_sigsegv;
|
||||
@@ -400,12 +403,14 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
|
||||
goto out_unlock_sigsegv;
|
||||
}
|
||||
flush_vmap_cache(kaddr, PAGE_SIZE);
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
out:
|
||||
page_ref_add(page, 1);
|
||||
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
|
||||
vmf->page = page;
|
||||
return 0;
|
||||
out_unlock_sigsegv:
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
|
||||
return VM_FAULT_SIGSEGV;
|
||||
}
|
||||
@@ -534,6 +539,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
||||
/* user_vm_end/start are fixed before bpf prog runs */
|
||||
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
|
||||
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
|
||||
struct mem_cgroup *new_memcg, *old_memcg;
|
||||
struct apply_range_data data;
|
||||
struct page **pages = NULL;
|
||||
long remaining, mapped = 0;
|
||||
@@ -555,11 +561,14 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
|
||||
/* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
|
||||
alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
|
||||
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
|
||||
if (!pages)
|
||||
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE);
|
||||
if (!pages) {
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
return 0;
|
||||
}
|
||||
data.pages = pages;
|
||||
|
||||
if (raw_res_spin_lock_irqsave(&arena->spinlock, flags))
|
||||
@@ -617,6 +626,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
||||
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
|
||||
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
|
||||
kfree_nolock(pages);
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
return clear_lo32(arena->user_vm_start) + uaddr32;
|
||||
out:
|
||||
range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
|
||||
@@ -630,6 +640,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
||||
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
|
||||
out_free_pages:
|
||||
kfree_nolock(pages);
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -651,6 +662,7 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
|
||||
|
||||
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)
|
||||
{
|
||||
struct mem_cgroup *new_memcg, *old_memcg;
|
||||
u64 full_uaddr, uaddr_end;
|
||||
long kaddr, pgoff;
|
||||
struct page *page;
|
||||
@@ -671,6 +683,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
|
||||
|
||||
page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT;
|
||||
pgoff = compute_pgoff(arena, uaddr);
|
||||
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
|
||||
|
||||
if (!sleepable)
|
||||
goto defer;
|
||||
@@ -709,11 +722,13 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
|
||||
zap_pages(arena, full_uaddr, 1);
|
||||
__free_page(page);
|
||||
}
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
|
||||
return;
|
||||
|
||||
defer:
|
||||
s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1);
|
||||
s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1);
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
if (!s)
|
||||
/*
|
||||
* If allocation fails in non-sleepable context, pages are intentionally left
|
||||
@@ -735,6 +750,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
|
||||
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
|
||||
{
|
||||
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
|
||||
struct mem_cgroup *new_memcg, *old_memcg;
|
||||
unsigned long flags;
|
||||
long pgoff;
|
||||
int ret;
|
||||
@@ -757,7 +773,9 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt
|
||||
}
|
||||
|
||||
/* "Allocate" the region to prevent it from being allocated. */
|
||||
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
|
||||
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
out:
|
||||
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
|
||||
return ret;
|
||||
@@ -766,6 +784,7 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt
|
||||
static void arena_free_worker(struct work_struct *work)
|
||||
{
|
||||
struct bpf_arena *arena = container_of(work, struct bpf_arena, free_work);
|
||||
struct mem_cgroup *new_memcg, *old_memcg;
|
||||
struct llist_node *list, *pos, *t;
|
||||
struct arena_free_span *s;
|
||||
u64 arena_vm_start, user_vm_start;
|
||||
@@ -780,6 +799,8 @@ static void arena_free_worker(struct work_struct *work)
|
||||
return;
|
||||
}
|
||||
|
||||
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
|
||||
|
||||
init_llist_head(&free_pages);
|
||||
arena_vm_start = bpf_arena_get_kern_vm_start(arena);
|
||||
user_vm_start = bpf_arena_get_user_vm_start(arena);
|
||||
@@ -820,6 +841,8 @@ static void arena_free_worker(struct work_struct *work)
|
||||
page = llist_entry(pos, struct page, pcp_llist);
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
bpf_map_memcg_exit(old_memcg, new_memcg);
|
||||
}
|
||||
|
||||
static void arena_free_irq(struct irq_work *iw)
|
||||
|
||||
@@ -149,7 +149,8 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
|
||||
range_it_insert(rn, rt);
|
||||
|
||||
/* Add a range */
|
||||
new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
|
||||
new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT,
|
||||
NUMA_NO_NODE);
|
||||
if (!new_rn)
|
||||
return -ENOMEM;
|
||||
new_rn->rn_start = last + 1;
|
||||
@@ -234,7 +235,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len)
|
||||
right->rn_start = start;
|
||||
range_it_insert(right, rt);
|
||||
} else {
|
||||
left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
|
||||
left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE);
|
||||
if (!left)
|
||||
return -ENOMEM;
|
||||
left->rn_start = start;
|
||||
|
||||
@@ -505,17 +505,29 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
|
||||
return root_mem_cgroup;
|
||||
}
|
||||
|
||||
void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
|
||||
struct mem_cgroup **new_memcg)
|
||||
{
|
||||
*new_memcg = bpf_map_get_memcg(map);
|
||||
*old_memcg = set_active_memcg(*new_memcg);
|
||||
}
|
||||
|
||||
void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
|
||||
struct mem_cgroup *new_memcg)
|
||||
{
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(new_memcg);
|
||||
}
|
||||
|
||||
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
|
||||
int node)
|
||||
{
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
void *ptr;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
bpf_map_memcg_enter(map, &old_memcg, &memcg);
|
||||
ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
bpf_map_memcg_exit(old_memcg, memcg);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -526,11 +538,9 @@ void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
void *ptr;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
bpf_map_memcg_enter(map, &old_memcg, &memcg);
|
||||
ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node);
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
bpf_map_memcg_exit(old_memcg, memcg);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -540,11 +550,9 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
void *ptr;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
bpf_map_memcg_enter(map, &old_memcg, &memcg);
|
||||
ptr = kzalloc(size, flags | __GFP_ACCOUNT);
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
bpf_map_memcg_exit(old_memcg, memcg);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -555,11 +563,9 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
void *ptr;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
bpf_map_memcg_enter(map, &old_memcg, &memcg);
|
||||
ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
bpf_map_memcg_exit(old_memcg, memcg);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -570,11 +576,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
void __percpu *ptr;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
bpf_map_memcg_enter(map, &old_memcg, &memcg);
|
||||
ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
bpf_map_memcg_exit(old_memcg, memcg);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -612,12 +616,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
|
||||
unsigned long i, j;
|
||||
struct page *pg;
|
||||
int ret = 0;
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
#endif
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
pg = __bpf_alloc_page(nid);
|
||||
|
||||
@@ -631,10 +630,6 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user