diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 88845aadc47d..18b592fde896 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -187,6 +187,7 @@ enum btf_field_type { BPF_RB_NODE = (1 << 7), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, + BPF_REFCOUNT = (1 << 8), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -210,6 +211,7 @@ struct btf_field_graph_root { struct btf_field { u32 offset; + u32 size; enum btf_field_type type; union { struct btf_field_kptr kptr; @@ -222,15 +224,10 @@ struct btf_record { u32 field_mask; int spin_lock_off; int timer_off; + int refcount_off; struct btf_field fields[]; }; -struct btf_field_offs { - u32 cnt; - u32 field_off[BTF_FIELDS_MAX]; - u8 field_sz[BTF_FIELDS_MAX]; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -257,7 +254,6 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - struct btf_field_offs *field_offs; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -299,6 +295,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_rb_root"; case BPF_RB_NODE: return "bpf_rb_node"; + case BPF_REFCOUNT: + return "bpf_refcount"; default: WARN_ON_ONCE(1); return "unknown"; @@ -323,6 +321,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_rb_root); case BPF_RB_NODE: return sizeof(struct bpf_rb_node); + case BPF_REFCOUNT: + return sizeof(struct bpf_refcount); default: WARN_ON_ONCE(1); return 0; @@ -347,12 +347,42 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_rb_root); case BPF_RB_NODE: return __alignof__(struct bpf_rb_node); + case BPF_REFCOUNT: + return __alignof__(struct bpf_refcount); default: WARN_ON_ONCE(1); return 0; } } +static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) +{ + memset(addr, 0, field->size); + + switch (field->type) { + case BPF_REFCOUNT: + refcount_set((refcount_t *)addr, 1); + break; + case BPF_RB_NODE: + RB_CLEAR_NODE((struct rb_node *)addr); + break; + case BPF_LIST_HEAD: + case BPF_LIST_NODE: + INIT_LIST_HEAD((struct list_head *)addr); + break; + case BPF_RB_ROOT: + /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ + case BPF_SPIN_LOCK: + case BPF_TIMER: + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + break; + default: + WARN_ON_ONCE(1); + return; + } +} + static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) { if (IS_ERR_OR_NULL(rec)) @@ -360,14 +390,14 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } -static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) +static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; - if (!foffs) + if (IS_ERR_OR_NULL(rec)) return; - for (i = 0; i < foffs->cnt; i++) - memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); + for (i = 0; i < rec->cnt; i++) + bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset); } /* 'dst' must be a temporary buffer and should not point to memory that is being @@ -379,7 +409,7 @@ static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { - bpf_obj_init(map->field_offs, dst); + bpf_obj_init(map->record, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and @@ -399,14 +429,14 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, +static inline void bpf_obj_memcpy(struct btf_record *rec, void *dst, void *src, u32 size, bool long_memcpy) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { if (long_memcpy) bpf_long_memcpy(dst, src, round_up(size, 8)); else @@ -414,49 +444,49 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memcpy(dst + curr_off, src + curr_off, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false); + bpf_obj_memcpy(map->record, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true); + bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } -static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size) +static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { memset(dst, 0, size); return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memset(dst + curr_off, 0, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memset(dst + curr_off, 0, size - curr_off); } static inline void zero_map_value(struct bpf_map *map, void *dst) { - bpf_obj_memzero(map->field_offs, dst, map->value_size); + bpf_obj_memzero(map->record, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f03852b89d28..3dd29a53b711 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -464,7 +464,12 @@ struct bpf_insn_aux_data { */ struct bpf_loop_inline_state loop_inline_state; }; - u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */ + union { + /* remember the size of type passed to bpf_obj_new to rewrite R1 */ + u64 obj_new_size; + /* remember the offset of node field within type to rewrite */ + u64 insert_off; + }; struct btf_struct_meta *kptr_struct_meta; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 495250162422..813227bff58a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -113,7 +113,6 @@ struct btf_id_dtor_kfunc { struct btf_struct_meta { u32 btf_id; struct btf_record *record; - struct btf_field_offs *field_offs; }; struct btf_struct_metas { @@ -207,7 +206,6 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); -struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3823100b7934..4b20a7269bee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6985,6 +6985,10 @@ struct bpf_rb_node { __u64 :64; } __attribute__((aligned(8))); +struct bpf_refcount { + __u32 :32; +} __attribute__((aligned(4))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2c2d1fb9f410..027f9f8a3551 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1666,10 +1666,8 @@ static void btf_struct_metas_free(struct btf_struct_metas *tab) if (!tab) return; - for (i = 0; i < tab->cnt; i++) { + for (i = 0; i < tab->cnt; i++) btf_record_free(tab->types[i].record); - kfree(tab->types[i].field_offs); - } kfree(tab); } @@ -3393,6 +3391,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); field_mask_test_name(BPF_RB_NODE, "bpf_rb_node"); + field_mask_test_name(BPF_REFCOUNT, "bpf_refcount"); /* Only return BPF_KPTR when all other types with matchable names fail */ if (field_mask & BPF_KPTR) { @@ -3441,6 +3440,7 @@ static int btf_find_struct_field(const struct btf *btf, case BPF_TIMER: case BPF_LIST_NODE: case BPF_RB_NODE: + case BPF_REFCOUNT: ret = btf_find_struct(btf, member_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3506,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, case BPF_TIMER: case BPF_LIST_NODE: case BPF_RB_NODE: + case BPF_REFCOUNT: ret = btf_find_struct(btf, var_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3700,12 +3701,24 @@ static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field, __alignof__(struct bpf_rb_node)); } +static int btf_field_cmp(const void *_a, const void *_b, const void *priv) +{ + const struct btf_field *a = (const struct btf_field *)_a; + const struct btf_field *b = (const struct btf_field *)_b; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + return 0; +} + struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size) { struct btf_field_info info_arr[BTF_FIELDS_MAX]; + u32 next_off = 0, field_type_size; struct btf_record *rec; - u32 next_off = 0; int ret, i, cnt; ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); @@ -3724,8 +3737,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; + rec->refcount_off = -EINVAL; for (i = 0; i < cnt; i++) { - if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) { + field_type_size = btf_field_type_size(info_arr[i].type); + if (info_arr[i].off + field_type_size > value_size) { WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size); ret = -EFAULT; goto end; @@ -3734,11 +3749,12 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type ret = -EEXIST; goto end; } - next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type); + next_off = info_arr[i].off + field_type_size; rec->field_mask |= info_arr[i].type; rec->fields[i].offset = info_arr[i].off; rec->fields[i].type = info_arr[i].type; + rec->fields[i].size = field_type_size; switch (info_arr[i].type) { case BPF_SPIN_LOCK: @@ -3751,6 +3767,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->timer_off = rec->fields[i].offset; break; + case BPF_REFCOUNT: + WARN_ON_ONCE(rec->refcount_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->refcount_off = rec->fields[i].offset; + break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); @@ -3784,30 +3805,16 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type goto end; } - /* need collection identity for non-owning refs before allowing this - * - * Consider a node type w/ both list and rb_node fields: - * struct node { - * struct bpf_list_node l; - * struct bpf_rb_node r; - * } - * - * Used like so: - * struct node *n = bpf_obj_new(....); - * bpf_list_push_front(&list_head, &n->l); - * bpf_rbtree_remove(&rb_root, &n->r); - * - * It should not be possible to rbtree_remove the node since it hasn't - * been added to a tree. But push_front converts n to a non-owning - * reference, and rbtree_remove accepts the non-owning reference to - * a type w/ bpf_rb_node field. - */ - if (btf_record_has_field(rec, BPF_LIST_NODE) && + if (rec->refcount_off < 0 && + btf_record_has_field(rec, BPF_LIST_NODE) && btf_record_has_field(rec, BPF_RB_NODE)) { ret = -EINVAL; goto end; } + sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp, + NULL, rec); + return rec; end: btf_record_free(rec); @@ -3889,61 +3896,6 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) return 0; } -static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv) -{ - const u32 a = *(const u32 *)_a; - const u32 b = *(const u32 *)_b; - - if (a < b) - return -1; - else if (a > b) - return 1; - return 0; -} - -static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv) -{ - struct btf_field_offs *foffs = (void *)priv; - u32 *off_base = foffs->field_off; - u32 *a = _a, *b = _b; - u8 *sz_a, *sz_b; - - sz_a = foffs->field_sz + (a - off_base); - sz_b = foffs->field_sz + (b - off_base); - - swap(*a, *b); - swap(*sz_a, *sz_b); -} - -struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec) -{ - struct btf_field_offs *foffs; - u32 i, *off; - u8 *sz; - - BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz)); - if (IS_ERR_OR_NULL(rec)) - return NULL; - - foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN); - if (!foffs) - return ERR_PTR(-ENOMEM); - - off = foffs->field_off; - sz = foffs->field_sz; - for (i = 0; i < rec->cnt; i++) { - off[i] = rec->fields[i].offset; - sz[i] = btf_field_type_size(rec->fields[i].type); - } - foffs->cnt = rec->cnt; - - if (foffs->cnt == 1) - return foffs; - sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]), - btf_field_offs_cmp, btf_field_offs_swap, foffs); - return foffs; -} - static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) @@ -5348,6 +5300,7 @@ static const char *alloc_obj_fields[] = { "bpf_list_node", "bpf_rb_root", "bpf_rb_node", + "bpf_refcount", }; static struct btf_struct_metas * @@ -5386,7 +5339,6 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) for (i = 1; i < n; i++) { struct btf_struct_metas *new_tab; const struct btf_member *member; - struct btf_field_offs *foffs; struct btf_struct_meta *type; struct btf_record *record; const struct btf_type *t; @@ -5422,23 +5374,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) type = &tab->types[tab->cnt]; type->btf_id = i; record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | - BPF_RB_ROOT | BPF_RB_NODE, t->size); + BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size); /* The record cannot be unset, treat it as an error if so */ if (IS_ERR_OR_NULL(record)) { ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; goto free; } - foffs = btf_parse_field_offs(record); - /* We need the field_offs to be valid for a valid record, - * either both should be set or both should be unset. - */ - if (IS_ERR_OR_NULL(foffs)) { - btf_record_free(record); - ret = -EFAULT; - goto free; - } type->record = record; - type->field_offs = foffs; tab->cnt++; } return tab; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f04e60a4847f..00e5fb0682ac 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1798,6 +1798,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) } } +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); + void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) { @@ -1828,13 +1830,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head, /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ - bpf_obj_free_fields(field->graph_root.value_rec, obj); - /* bpf_mem_free requires migrate_disable(), since we can be - * called from map free path as well apart from BPF program (as - * part of map ops doing bpf_obj_free_fields). - */ migrate_disable(); - bpf_mem_free(&bpf_global_ma, obj); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec); migrate_enable(); } } @@ -1871,10 +1868,9 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, obj = pos; obj -= field->graph_root.node_offset; - bpf_obj_free_fields(field->graph_root.value_rec, obj); migrate_disable(); - bpf_mem_free(&bpf_global_ma, obj); + __bpf_obj_drop_impl(obj, field->graph_root.value_rec); migrate_enable(); } } @@ -1893,12 +1889,21 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) if (!p) return NULL; if (meta) - bpf_obj_init(meta->field_offs, p); + bpf_obj_init(meta->record, p); return p; } +/* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) { + if (rec && rec->refcount_off >= 0 && + !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { + /* Object is refcounted and refcount_dec didn't result in 0 + * refcount. Return without freeing the object + */ + return; + } + if (rec) bpf_obj_free_fields(rec, p); bpf_mem_free(&bpf_global_ma, p); @@ -1912,31 +1917,68 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } -static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) +__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign) +{ + struct btf_struct_meta *meta = meta__ign; + struct bpf_refcount *ref; + + /* Could just cast directly to refcount_t *, but need some code using + * bpf_refcount type so that it is emitted in vmlinux BTF + */ + ref = (struct bpf_refcount *)p__refcounted_kptr + meta->record->refcount_off; + + refcount_inc((refcount_t *)ref); + return (void *)p__refcounted_kptr; +} + +static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, + bool tail, struct btf_record *rec, u64 off) { struct list_head *n = (void *)node, *h = (void *)head; + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); - if (unlikely(!n->next)) - INIT_LIST_HEAD(n); + if (!list_empty(n)) { + /* Only called from BPF prog, no need to migrate_disable */ + __bpf_obj_drop_impl(n - off, rec); + return -EINVAL; + } + tail ? list_add_tail(n, h) : list_add(n, h); + + return 0; } -__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta__ign, u64 off) { - return __bpf_list_add(node, head, false); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_list_add(node, head, false, + meta ? meta->record : NULL, off); } -__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) +__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta__ign, u64 off) { - return __bpf_list_add(node, head, true); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_list_add(node, head, true, + meta ? meta->record : NULL, off); } static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) { struct list_head *n, *h = (void *)head; + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ if (unlikely(!h->next)) INIT_LIST_HEAD(h); if (list_empty(h)) @@ -1962,6 +2004,9 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct rb_root_cached *r = (struct rb_root_cached *)root; struct rb_node *n = (struct rb_node *)node; + if (RB_EMPTY_NODE(n)) + return NULL; + rb_erase_cached(n, r); RB_CLEAR_NODE(n); return (struct bpf_rb_node *)n; @@ -1970,14 +2015,20 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF * program */ -static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - void *less) +static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + void *less, struct btf_record *rec, u64 off) { struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; + struct rb_node *parent = NULL, *n = (struct rb_node *)node; bpf_callback_t cb = (bpf_callback_t)less; - struct rb_node *parent = NULL; bool leftmost = true; + if (!RB_EMPTY_NODE(n)) { + /* Only called from BPF prog, no need to migrate_disable */ + __bpf_obj_drop_impl(n - off, rec); + return -EINVAL; + } + while (*link) { parent = *link; if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) { @@ -1988,15 +2039,18 @@ static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, } } - rb_link_node((struct rb_node *)node, parent, link); - rb_insert_color_cached((struct rb_node *)node, - (struct rb_root_cached *)root, leftmost); + rb_link_node(n, parent, link); + rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost); + return 0; } -__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) +__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, + bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), + void *meta__ign, u64 off) { - __bpf_rbtree_add(root, node, (void *)less); + struct btf_struct_meta *meta = meta__ign; + + return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off); } __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) @@ -2271,14 +2325,15 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_list_push_front) -BTF_ID_FLAGS(func, bpf_list_push_back) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_list_push_front_impl) +BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE) -BTF_ID_FLAGS(func, bpf_rbtree_add) +BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) #ifdef CONFIG_CGROUPS diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 38136ec4e095..2c5c64c2a53b 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -56,18 +56,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) ret = PTR_ERR(inner_map_meta->record); goto free; } - if (inner_map_meta->record) { - struct btf_field_offs *field_offs; - /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always - * valid. - */ - field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN); - if (!field_offs) { - ret = -ENOMEM; - goto free_rec; - } - inner_map_meta->field_offs = field_offs; - } /* Note: We must use the same BTF, as we also used btf_record_dup above * which relies on BTF being same for both maps, as some members like * record->fields.list_head have pointers like value_rec pointing into @@ -88,8 +76,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) fdput(f); return inner_map_meta; -free_rec: - btf_record_free(inner_map_meta->record); free: kfree(inner_map_meta); put: @@ -99,7 +85,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) void bpf_map_meta_free(struct bpf_map *map_meta) { - kfree(map_meta->field_offs); bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6d575505f89c..28eac7434d32 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -552,6 +552,7 @@ void btf_record_free(struct btf_record *rec) case BPF_RB_NODE: case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_REFCOUNT: /* Nothing to release */ break; default: @@ -599,6 +600,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_RB_NODE: case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_REFCOUNT: /* Nothing to acquire */ break; default: @@ -705,6 +707,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) break; case BPF_LIST_NODE: case BPF_RB_NODE: + case BPF_REFCOUNT: break; default: WARN_ON_ONCE(1); @@ -717,14 +720,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - struct btf_field_offs *foffs = map->field_offs; struct btf_record *rec = map->record; security_bpf_map_free(map); bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); - /* Delay freeing of field_offs and btf_record for maps, as map_free + /* Delay freeing of btf_record for maps, as map_free * callback usually needs access to them. It is better to do it here * than require each callback to do the free itself manually. * @@ -733,7 +735,6 @@ static void bpf_map_free_deferred(struct work_struct *work) * eventually calls bpf_map_free_meta, since inner_map_meta is only a * template bpf_map struct used during verification. */ - kfree(foffs); btf_record_free(rec); } @@ -1034,7 +1035,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | - BPF_RB_ROOT, + BPF_RB_ROOT | BPF_REFCOUNT, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; @@ -1073,6 +1074,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_REFCOUNT: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && @@ -1125,7 +1127,6 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); - struct btf_field_offs *foffs; struct bpf_map *map; int f_flags; int err; @@ -1205,17 +1206,9 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - - foffs = btf_parse_field_offs(map->record); - if (IS_ERR(foffs)) { - err = PTR_ERR(foffs); - goto free_map; - } - map->field_offs = foffs; - err = security_bpf_map_alloc(map); if (err) - goto free_map_field_offs; + goto free_map; err = bpf_map_alloc_id(map); if (err) @@ -1239,8 +1232,6 @@ static int map_create(union bpf_attr *attr) free_map_sec: security_bpf_map_free(map); -free_map_field_offs: - kfree(map->field_offs); free_map: btf_put(map->btf); map->ops->map_free(map); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4aa6d715e655..6a41b69a424e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -273,6 +273,11 @@ struct bpf_call_arg_meta { struct btf_field *kptr_field; }; +struct btf_and_id { + struct btf *btf; + u32 btf_id; +}; + struct bpf_kfunc_call_arg_meta { /* In parameters */ struct btf *btf; @@ -291,10 +296,10 @@ struct bpf_kfunc_call_arg_meta { u64 value; bool found; } arg_constant; - struct { - struct btf *btf; - u32 btf_id; - } arg_obj_drop; + union { + struct btf_and_id arg_obj_drop; + struct btf_and_id arg_refcount_acquire; + }; struct { struct btf_field *field; } arg_list_head; @@ -8495,10 +8500,10 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *callee, int insn_idx) { - /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, + /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)); * - * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset + * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd * by this point, so look at 'root' */ @@ -9403,6 +9408,11 @@ static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *a return __kfunc_param_match_suffix(btf, arg, "__uninit"); } +static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -9542,15 +9552,16 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_CTX, - KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ - KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ + KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ + KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */ + KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ KF_ARG_PTR_TO_DYNPTR, KF_ARG_PTR_TO_ITER, KF_ARG_PTR_TO_LIST_HEAD, KF_ARG_PTR_TO_LIST_NODE, - KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ + KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ KF_ARG_PTR_TO_MEM, - KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ + KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ KF_ARG_PTR_TO_CALLBACK, KF_ARG_PTR_TO_RB_ROOT, KF_ARG_PTR_TO_RB_NODE, @@ -9559,8 +9570,9 @@ enum kfunc_ptr_arg_type { enum special_kfunc_type { KF_bpf_obj_new_impl, KF_bpf_obj_drop_impl, - KF_bpf_list_push_front, - KF_bpf_list_push_back, + KF_bpf_refcount_acquire_impl, + KF_bpf_list_push_front_impl, + KF_bpf_list_push_back_impl, KF_bpf_list_pop_front, KF_bpf_list_pop_back, KF_bpf_cast_to_kern_ctx, @@ -9568,7 +9580,7 @@ enum special_kfunc_type { KF_bpf_rcu_read_lock, KF_bpf_rcu_read_unlock, KF_bpf_rbtree_remove, - KF_bpf_rbtree_add, + KF_bpf_rbtree_add_impl, KF_bpf_rbtree_first, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, @@ -9579,14 +9591,15 @@ enum special_kfunc_type { BTF_SET_START(special_kfunc_set) BTF_ID(func, bpf_obj_new_impl) BTF_ID(func, bpf_obj_drop_impl) -BTF_ID(func, bpf_list_push_front) -BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_refcount_acquire_impl) +BTF_ID(func, bpf_list_push_front_impl) +BTF_ID(func, bpf_list_push_back_impl) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) BTF_ID(func, bpf_cast_to_kern_ctx) BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rbtree_remove) -BTF_ID(func, bpf_rbtree_add) +BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) @@ -9597,8 +9610,9 @@ BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) BTF_ID(func, bpf_obj_new_impl) BTF_ID(func, bpf_obj_drop_impl) -BTF_ID(func, bpf_list_push_front) -BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_refcount_acquire_impl) +BTF_ID(func, bpf_list_push_front_impl) +BTF_ID(func, bpf_list_push_back_impl) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) BTF_ID(func, bpf_cast_to_kern_ctx) @@ -9606,7 +9620,7 @@ BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rcu_read_lock) BTF_ID(func, bpf_rcu_read_unlock) BTF_ID(func, bpf_rbtree_remove) -BTF_ID(func, bpf_rbtree_add) +BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) @@ -9649,6 +9663,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) return KF_ARG_PTR_TO_ALLOC_BTF_ID; + if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_REFCOUNTED_KPTR; + if (is_kfunc_arg_kptr_get(meta, argno)) { if (!btf_type_is_ptr(ref_t)) { verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n"); @@ -9937,27 +9954,28 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ static bool is_bpf_list_api_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_list_push_front] || - btf_id == special_kfunc_list[KF_bpf_list_push_back] || + return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || btf_id == special_kfunc_list[KF_bpf_list_pop_front] || btf_id == special_kfunc_list[KF_bpf_list_pop_back]; } static bool is_bpf_rbtree_api_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_rbtree_add] || + return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || btf_id == special_kfunc_list[KF_bpf_rbtree_first]; } static bool is_bpf_graph_api_kfunc(u32 btf_id) { - return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id); + return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) || + btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; } static bool is_callback_calling_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_rbtree_add]; + return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } static bool is_rbtree_lock_required_kfunc(u32 btf_id) @@ -9998,12 +10016,12 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, switch (node_field_type) { case BPF_LIST_NODE: - ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]); + ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]); break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || - kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]); + kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]); break; default: verbose(env, "verifier internal error: unexpected graph node argument type %s\n", @@ -10171,6 +10189,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ const char *func_name = meta->func_name, *ref_tname; const struct btf *btf = meta->btf; const struct btf_param *args; + struct btf_record *rec; u32 i, nargs; int ret; @@ -10306,6 +10325,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM: case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: /* Trusted by default */ break; default: @@ -10523,6 +10543,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_CALLBACK: meta->subprogno = reg->subprogno; break; + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) { + verbose(env, "arg#%d is neither owning or non-owning ref\n", i); + return -EINVAL; + } + + rec = reg_btf_record(reg); + if (!rec) { + verbose(env, "verifier internal error: Couldn't find btf_record\n"); + return -EFAULT; + } + + if (rec->refcount_off < 0) { + verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i); + return -EINVAL; + } + + meta->arg_refcount_acquire.btf = reg->btf; + meta->arg_refcount_acquire.btf_id = reg->btf_id; + break; } } @@ -10662,10 +10702,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] || - meta.func_id == special_kfunc_list[KF_bpf_list_push_back] || - meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) { + if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; + insn_aux->insert_off = regs[BPF_REG_2].off; err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", @@ -10681,7 +10722,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) { + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_rbtree_add_callback_state); if (err) { @@ -10699,7 +10740,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { /* Only exception is bpf_obj_new_impl */ - if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) { + if (meta.btf != btf_vmlinux || + (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] && + meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) { verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); return -EINVAL; } @@ -10747,6 +10790,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_aux->obj_new_size = ret_t->size; insn_aux->kptr_struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); + } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[BPF_REG_0].btf = meta.arg_refcount_acquire.btf; + regs[BPF_REG_0].btf_id = meta.arg_refcount_acquire.btf_id; + + insn_aux->kptr_struct_meta = + btf_find_struct_meta(meta.arg_refcount_acquire.btf, + meta.arg_refcount_acquire.btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { struct btf_field *field = meta.arg_list_head.field; @@ -10870,9 +10922,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ref_set_non_owning(env, ®s[BPF_REG_0]); } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove]) - invalidate_non_owning_refs(env); - if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) regs[BPF_REG_0].id = ++env->id_gen; } else if (btf_type_is_void(t)) { @@ -14713,7 +14762,7 @@ static bool regs_exact(const struct bpf_reg_state *rold, const struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { - return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && check_ids(rold->id, rcur->id, idmap) && check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); } @@ -17356,6 +17405,23 @@ static void specialize_kfunc(struct bpf_verifier_env *env, } } +static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, + u16 struct_meta_reg, + u16 node_offset_reg, + struct bpf_insn *insn, + struct bpf_insn *insn_buf, + int *cnt) +{ + struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta; + struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) }; + + insn_buf[0] = addr[0]; + insn_buf[1] = addr[1]; + insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off); + insn_buf[3] = *insn; + *cnt = 4; +} + static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { @@ -17393,7 +17459,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[2] = addr[1]; insn_buf[3] = *insn; *cnt = 4; - } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] || + desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; @@ -17401,6 +17468,20 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_buf[1] = addr[1]; insn_buf[2] = *insn; *cnt = 3; + } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + int struct_meta_reg = BPF_REG_3; + int node_offset_reg = BPF_REG_4; + + /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */ + if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + struct_meta_reg = BPF_REG_4; + node_offset_reg = BPF_REG_5; + } + + __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg, + node_offset_reg, insn, insn_buf, cnt); } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3823100b7934..4b20a7269bee 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6985,6 +6985,10 @@ struct bpf_rb_node { __u64 :64; } __attribute__((aligned(8))); +struct bpf_refcount { + __u32 :32; +} __attribute__((aligned(4))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index dbd2c729781a..209811b1993a 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -14,7 +14,8 @@ * type ID of a struct in program BTF. * * The 'local_type_id' parameter must be a known constant. - * The 'meta' parameter is a hidden argument that is ignored. + * The 'meta' parameter is rewritten by the verifier, no need for BPF + * program to set it. * Returns * A pointer to an object of the type corresponding to the passed in * 'local_type_id', or NULL on failure. @@ -28,7 +29,8 @@ extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym; * Free an allocated object. All fields of the object that require * destruction will be destructed before the storage is freed. * - * The 'meta' parameter is a hidden argument that is ignored. + * The 'meta' parameter is rewritten by the verifier, no need for BPF + * program to set it. * Returns * Void. */ @@ -38,18 +40,50 @@ extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) /* Description - * Add a new entry to the beginning of the BPF linked list. + * Increment the refcount on a refcounted local kptr, turning the + * non-owning reference input into an owning reference in the process. + * + * The 'meta' parameter is rewritten by the verifier, no need for BPF + * program to set it. * Returns - * Void. + * An owning reference to the object pointed to by 'kptr' */ -extern void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_refcount_acquire_impl */ +#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL) + +/* Description + * Add a new entry to the beginning of the BPF linked list. + * + * The 'meta' and 'off' parameters are rewritten by the verifier, no need + * for BPF programs to set them + * Returns + * 0 if the node was successfully added + * -EINVAL if the node wasn't added because it's already in a list + */ +extern int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; + +/* Convenience macro to wrap over bpf_list_push_front_impl */ +#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0) /* Description * Add a new entry to the end of the BPF linked list. + * + * The 'meta' and 'off' parameters are rewritten by the verifier, no need + * for BPF programs to set them * Returns - * Void. + * 0 if the node was successfully added + * -EINVAL if the node wasn't added because it's already in a list */ -extern void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +extern int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; + +/* Convenience macro to wrap over bpf_list_push_back_impl */ +#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0) /* Description * Remove the entry at the beginning of the BPF linked list. @@ -75,11 +109,19 @@ extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, /* Description * Add 'node' to rbtree with root 'root' using comparator 'less' + * + * The 'meta' and 'off' parameters are rewritten by the verifier, no need + * for BPF programs to set them * Returns - * Nothing + * 0 if the node was successfully added + * -EINVAL if the node wasn't added because it's already in a tree */ -extern void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, - bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) __ksym; +extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, + bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), + void *meta, __u64 off) __ksym; + +/* Convenience macro to wrap over bpf_rbtree_add_impl */ +#define bpf_rbtree_add(head, node, less) bpf_rbtree_add_impl(head, node, less, NULL, 0) /* Description * Return the first (leftmost) node in input tree diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 0ed8132ce1c3..f63309fd0e28 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -84,11 +84,11 @@ static struct { { "double_push_back", "arg#1 expected pointer to allocated object" }, { "no_node_value_type", "bpf_list_node not found at offset=0" }, { "incorrect_value_type", - "operation on bpf_list_head expects arg#1 bpf_list_node at offset=0 in struct foo, " + "operation on bpf_list_head expects arg#1 bpf_list_node at offset=40 in struct foo, " "but arg is at offset=0 in struct bar" }, { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, - { "incorrect_node_off1", "bpf_list_node not found at offset=1" }, - { "incorrect_node_off2", "arg#1 offset=40, but expected bpf_list_node at offset=0 in struct foo" }, + { "incorrect_node_off1", "bpf_list_node not found at offset=41" }, + { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=40 in struct foo" }, { "no_head_type", "bpf_list_head not found at offset=0" }, { "incorrect_head_var_off1", "R1 doesn't have constant offset" }, { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, @@ -266,6 +266,59 @@ static struct btf *init_btf(void) return NULL; } +static void list_and_rb_node_same_struct(bool refcount_field) +{ + int bpf_rb_node_btf_id, bpf_refcount_btf_id, foo_btf_id; + struct btf *btf; + int id, err; + + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + return; + + bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 24); + if (!ASSERT_GT(bpf_rb_node_btf_id, 0, "btf__add_struct bpf_rb_node")) + return; + + if (refcount_field) { + bpf_refcount_btf_id = btf__add_struct(btf, "bpf_refcount", 4); + if (!ASSERT_GT(bpf_refcount_btf_id, 0, "btf__add_struct bpf_refcount")) + return; + } + + id = btf__add_struct(btf, "bar", refcount_field ? 44 : 40); + if (!ASSERT_GT(id, 0, "btf__add_struct bar")) + return; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + return; + err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + return; + if (refcount_field) { + err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 320, 0); + if (!ASSERT_OK(err, "btf__add_field bar::ref")) + return; + } + + foo_btf_id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_GT(foo_btf_id, 0, "btf__add_struct foo")) + return; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + return; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + return; + id = btf__add_decl_tag(btf, "contains:bar:a", foo_btf_id, 0); + if (!ASSERT_GT(id, 0, "btf__add_decl_tag contains:bar:a")) + return; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, refcount_field ? 0 : -EINVAL, "check btf"); + btf__free(btf); +} + static void test_btf(void) { struct btf *btf = NULL; @@ -717,39 +770,12 @@ static void test_btf(void) } while (test__start_subtest("btf: list_node and rb_node in same struct")) { - btf = init_btf(); - if (!ASSERT_OK_PTR(btf, "init_btf")) - break; + list_and_rb_node_same_struct(true); + break; + } - id = btf__add_struct(btf, "bpf_rb_node", 24); - if (!ASSERT_EQ(id, 5, "btf__add_struct bpf_rb_node")) - break; - id = btf__add_struct(btf, "bar", 40); - if (!ASSERT_EQ(id, 6, "btf__add_struct bar")) - break; - err = btf__add_field(btf, "a", LIST_NODE, 0, 0); - if (!ASSERT_OK(err, "btf__add_field bar::a")) - break; - err = btf__add_field(btf, "c", 5, 128, 0); - if (!ASSERT_OK(err, "btf__add_field bar::c")) - break; - - id = btf__add_struct(btf, "foo", 20); - if (!ASSERT_EQ(id, 7, "btf__add_struct foo")) - break; - err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); - if (!ASSERT_OK(err, "btf__add_field foo::a")) - break; - err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); - if (!ASSERT_OK(err, "btf__add_field foo::b")) - break; - id = btf__add_decl_tag(btf, "contains:bar:a", 7, 0); - if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:bar:a")) - break; - - err = btf__load_into_kernel(btf); - ASSERT_EQ(err, -EINVAL, "check btf"); - btf__free(btf); + while (test__start_subtest("btf: list_node and rb_node in same struct, no bpf_refcount")) { + list_and_rb_node_same_struct(false); break; } } diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index 156fa95c42f6..e9300c96607d 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -77,6 +77,29 @@ static void test_rbtree_first_and_remove(void) rbtree__destroy(skel); } +static void test_rbtree_api_release_aliasing(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_api_release_aliasing), &opts); + ASSERT_OK(ret, "rbtree_api_release_aliasing"); + ASSERT_OK(opts.retval, "rbtree_api_release_aliasing retval"); + ASSERT_EQ(skel->data->first_data[0], 42, "rbtree_api_release_aliasing first rbtree_remove()"); + ASSERT_EQ(skel->data->first_data[1], -1, "rbtree_api_release_aliasing second rbtree_remove()"); + + rbtree__destroy(skel); +} + void test_rbtree_success(void) { if (test__start_subtest("rbtree_add_nodes")) @@ -85,6 +108,8 @@ void test_rbtree_success(void) test_rbtree_add_and_remove(); if (test__start_subtest("rbtree_first_and_remove")) test_rbtree_first_and_remove(); + if (test__start_subtest("rbtree_api_release_aliasing")) + test_rbtree_api_release_aliasing(); } #define BTF_FAIL_TEST(suffix) \ diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c new file mode 100644 index 000000000000..2ab23832062d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "refcounted_kptr.skel.h" +#include "refcounted_kptr_fail.skel.h" + +void test_refcounted_kptr(void) +{ + RUN_TESTS(refcounted_kptr); +} + +void test_refcounted_kptr_fail(void) +{ + RUN_TESTS(refcounted_kptr_fail); +} diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 53ded51a3abb..57440a554304 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -25,7 +25,7 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l n = bpf_list_pop_front(head); bpf_spin_unlock(lock); if (n) { - bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(container_of(n, struct foo, node2)); bpf_obj_drop(f); return 3; } @@ -34,7 +34,7 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l n = bpf_list_pop_back(head); bpf_spin_unlock(lock); if (n) { - bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(container_of(n, struct foo, node2)); bpf_obj_drop(f); return 4; } @@ -42,7 +42,7 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l bpf_spin_lock(lock); f->data = 42; - bpf_list_push_front(head, &f->node); + bpf_list_push_front(head, &f->node2); bpf_spin_unlock(lock); if (leave_in_map) return 0; @@ -51,7 +51,7 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l bpf_spin_unlock(lock); if (!n) return 5; - f = container_of(n, struct foo, node); + f = container_of(n, struct foo, node2); if (f->data != 42) { bpf_obj_drop(f); return 6; @@ -59,14 +59,14 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l bpf_spin_lock(lock); f->data = 13; - bpf_list_push_front(head, &f->node); + bpf_list_push_front(head, &f->node2); bpf_spin_unlock(lock); bpf_spin_lock(lock); n = bpf_list_pop_front(head); bpf_spin_unlock(lock); if (!n) return 7; - f = container_of(n, struct foo, node); + f = container_of(n, struct foo, node2); if (f->data != 13) { bpf_obj_drop(f); return 8; @@ -77,7 +77,7 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l n = bpf_list_pop_front(head); bpf_spin_unlock(lock); if (n) { - bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(container_of(n, struct foo, node2)); return 9; } @@ -85,7 +85,7 @@ int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool l n = bpf_list_pop_back(head); bpf_spin_unlock(lock); if (n) { - bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(container_of(n, struct foo, node2)); return 10; } return 0; @@ -119,8 +119,8 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea f[i + 1]->data = i + 1; bpf_spin_lock(lock); - bpf_list_push_front(head, &f[i]->node); - bpf_list_push_front(head, &f[i + 1]->node); + bpf_list_push_front(head, &f[i]->node2); + bpf_list_push_front(head, &f[i + 1]->node2); bpf_spin_unlock(lock); } @@ -130,13 +130,13 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea bpf_spin_unlock(lock); if (!n) return 3; - pf = container_of(n, struct foo, node); + pf = container_of(n, struct foo, node2); if (pf->data != (ARRAY_SIZE(f) - i - 1)) { bpf_obj_drop(pf); return 4; } bpf_spin_lock(lock); - bpf_list_push_back(head, &pf->node); + bpf_list_push_back(head, &pf->node2); bpf_spin_unlock(lock); } @@ -149,7 +149,7 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea bpf_spin_unlock(lock); if (!n) return 5; - pf = container_of(n, struct foo, node); + pf = container_of(n, struct foo, node2); if (pf->data != i) { bpf_obj_drop(pf); return 6; @@ -160,7 +160,7 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea n = bpf_list_pop_back(head); bpf_spin_unlock(lock); if (n) { - bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(container_of(n, struct foo, node2)); return 7; } @@ -168,7 +168,7 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea n = bpf_list_pop_front(head); bpf_spin_unlock(lock); if (n) { - bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(container_of(n, struct foo, node2)); return 8; } return 0; @@ -199,7 +199,7 @@ int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool le bpf_spin_lock(lock); f->data = 42; - bpf_list_push_front(head, &f->node); + bpf_list_push_front(head, &f->node2); bpf_spin_unlock(lock); if (leave_in_map) @@ -210,7 +210,7 @@ int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool le bpf_spin_unlock(lock); if (!n) return 4; - f = container_of(n, struct foo, node); + f = container_of(n, struct foo, node2); if (f->data != 42) { bpf_obj_drop(f); return 5; diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h index 3fb2412552fc..c0f3609a7ffa 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.h +++ b/tools/testing/selftests/bpf/progs/linked_list.h @@ -22,7 +22,7 @@ struct foo { struct map_value { struct bpf_spin_lock lock; int data; - struct bpf_list_head head __contains(foo, node); + struct bpf_list_head head __contains(foo, node2); }; struct array_map { @@ -50,7 +50,7 @@ struct { #define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; -private(A) struct bpf_list_head ghead __contains(foo, node); +private(A) struct bpf_list_head ghead __contains(foo, node2); private(B) struct bpf_spin_lock glock2; #endif diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index 41978b46f58e..f4c63daba229 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -73,22 +73,21 @@ CHECK(inner_map, pop_back, &iv->head); int test##_missing_lock_##op(void *ctx) \ { \ INIT; \ - void (*p)(void *, void *) = (void *)&bpf_list_##op; \ - p(hexpr, nexpr); \ + bpf_list_##op(hexpr, nexpr); \ return 0; \ } -CHECK(kptr, push_front, &f->head, b); -CHECK(kptr, push_back, &f->head, b); +CHECK(kptr, push_front, &f->head, &b->node); +CHECK(kptr, push_back, &f->head, &b->node); -CHECK(global, push_front, &ghead, f); -CHECK(global, push_back, &ghead, f); +CHECK(global, push_front, &ghead, &f->node2); +CHECK(global, push_back, &ghead, &f->node2); -CHECK(map, push_front, &v->head, f); -CHECK(map, push_back, &v->head, f); +CHECK(map, push_front, &v->head, &f->node2); +CHECK(map, push_back, &v->head, &f->node2); -CHECK(inner_map, push_front, &iv->head, f); -CHECK(inner_map, push_back, &iv->head, f); +CHECK(inner_map, push_front, &iv->head, &f->node2); +CHECK(inner_map, push_back, &iv->head, &f->node2); #undef CHECK @@ -135,32 +134,31 @@ CHECK_OP(pop_back); int test##_incorrect_lock_##op(void *ctx) \ { \ INIT; \ - void (*p)(void *, void*) = (void *)&bpf_list_##op; \ bpf_spin_lock(lexpr); \ - p(hexpr, nexpr); \ + bpf_list_##op(hexpr, nexpr); \ return 0; \ } #define CHECK_OP(op) \ - CHECK(kptr_kptr, op, &f1->lock, &f2->head, b); \ - CHECK(kptr_global, op, &f1->lock, &ghead, f); \ - CHECK(kptr_map, op, &f1->lock, &v->head, f); \ - CHECK(kptr_inner_map, op, &f1->lock, &iv->head, f); \ + CHECK(kptr_kptr, op, &f1->lock, &f2->head, &b->node); \ + CHECK(kptr_global, op, &f1->lock, &ghead, &f->node2); \ + CHECK(kptr_map, op, &f1->lock, &v->head, &f->node2); \ + CHECK(kptr_inner_map, op, &f1->lock, &iv->head, &f->node2); \ \ - CHECK(global_global, op, &glock2, &ghead, f); \ - CHECK(global_kptr, op, &glock, &f1->head, b); \ - CHECK(global_map, op, &glock, &v->head, f); \ - CHECK(global_inner_map, op, &glock, &iv->head, f); \ + CHECK(global_global, op, &glock2, &ghead, &f->node2); \ + CHECK(global_kptr, op, &glock, &f1->head, &b->node); \ + CHECK(global_map, op, &glock, &v->head, &f->node2); \ + CHECK(global_inner_map, op, &glock, &iv->head, &f->node2); \ \ - CHECK(map_map, op, &v->lock, &v2->head, f); \ - CHECK(map_kptr, op, &v->lock, &f2->head, b); \ - CHECK(map_global, op, &v->lock, &ghead, f); \ - CHECK(map_inner_map, op, &v->lock, &iv->head, f); \ + CHECK(map_map, op, &v->lock, &v2->head, &f->node2); \ + CHECK(map_kptr, op, &v->lock, &f2->head, &b->node); \ + CHECK(map_global, op, &v->lock, &ghead, &f->node2); \ + CHECK(map_inner_map, op, &v->lock, &iv->head, &f->node2); \ \ - CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, f); \ - CHECK(inner_map_kptr, op, &iv->lock, &f2->head, b); \ - CHECK(inner_map_global, op, &iv->lock, &ghead, f); \ - CHECK(inner_map_map, op, &iv->lock, &v->head, f); + CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, &f->node2);\ + CHECK(inner_map_kptr, op, &iv->lock, &f2->head, &b->node); \ + CHECK(inner_map_global, op, &iv->lock, &ghead, &f->node2); \ + CHECK(inner_map_map, op, &iv->lock, &v->head, &f->node2); CHECK_OP(push_front); CHECK_OP(push_back); @@ -340,7 +338,7 @@ int direct_read_node(void *ctx) f = bpf_obj_new(typeof(*f)); if (!f) return 0; - return *(int *)&f->node; + return *(int *)&f->node2; } SEC("?tc") @@ -351,12 +349,12 @@ int direct_write_node(void *ctx) f = bpf_obj_new(typeof(*f)); if (!f) return 0; - *(int *)&f->node = 0; + *(int *)&f->node2 = 0; return 0; } static __always_inline -int use_after_unlock(void (*op)(void *head, void *node)) +int use_after_unlock(bool push_front) { struct foo *f; @@ -365,7 +363,10 @@ int use_after_unlock(void (*op)(void *head, void *node)) return 0; bpf_spin_lock(&glock); f->data = 42; - op(&ghead, &f->node); + if (push_front) + bpf_list_push_front(&ghead, &f->node2); + else + bpf_list_push_back(&ghead, &f->node2); bpf_spin_unlock(&glock); return f->data; @@ -374,17 +375,17 @@ int use_after_unlock(void (*op)(void *head, void *node)) SEC("?tc") int use_after_unlock_push_front(void *ctx) { - return use_after_unlock((void *)bpf_list_push_front); + return use_after_unlock(true); } SEC("?tc") int use_after_unlock_push_back(void *ctx) { - return use_after_unlock((void *)bpf_list_push_back); + return use_after_unlock(false); } static __always_inline -int list_double_add(void (*op)(void *head, void *node)) +int list_double_add(bool push_front) { struct foo *f; @@ -392,8 +393,13 @@ int list_double_add(void (*op)(void *head, void *node)) if (!f) return 0; bpf_spin_lock(&glock); - op(&ghead, &f->node); - op(&ghead, &f->node); + if (push_front) { + bpf_list_push_front(&ghead, &f->node2); + bpf_list_push_front(&ghead, &f->node2); + } else { + bpf_list_push_back(&ghead, &f->node2); + bpf_list_push_back(&ghead, &f->node2); + } bpf_spin_unlock(&glock); return 0; @@ -402,13 +408,13 @@ int list_double_add(void (*op)(void *head, void *node)) SEC("?tc") int double_push_front(void *ctx) { - return list_double_add((void *)bpf_list_push_front); + return list_double_add(true); } SEC("?tc") int double_push_back(void *ctx) { - return list_double_add((void *)bpf_list_push_back); + return list_double_add(false); } SEC("?tc") @@ -450,7 +456,7 @@ int incorrect_node_var_off(struct __sk_buff *ctx) if (!f) return 0; bpf_spin_lock(&glock); - bpf_list_push_front(&ghead, (void *)&f->node + ctx->protocol); + bpf_list_push_front(&ghead, (void *)&f->node2 + ctx->protocol); bpf_spin_unlock(&glock); return 0; @@ -465,7 +471,7 @@ int incorrect_node_off1(void *ctx) if (!f) return 0; bpf_spin_lock(&glock); - bpf_list_push_front(&ghead, (void *)&f->node + 1); + bpf_list_push_front(&ghead, (void *)&f->node2 + 1); bpf_spin_unlock(&glock); return 0; @@ -480,7 +486,7 @@ int incorrect_node_off2(void *ctx) if (!f) return 0; bpf_spin_lock(&glock); - bpf_list_push_front(&ghead, &f->node2); + bpf_list_push_front(&ghead, &f->node); bpf_spin_unlock(&glock); return 0; @@ -510,7 +516,7 @@ int incorrect_head_var_off1(struct __sk_buff *ctx) if (!f) return 0; bpf_spin_lock(&glock); - bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node); + bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node2); bpf_spin_unlock(&glock); return 0; @@ -525,7 +531,7 @@ int incorrect_head_var_off2(struct __sk_buff *ctx) if (!f) return 0; bpf_spin_lock(&glock); - bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node); + bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node2); bpf_spin_unlock(&glock); return 0; @@ -563,7 +569,7 @@ int incorrect_head_off2(void *ctx) return 0; bpf_spin_lock(&glock); - bpf_list_push_front((void *)&ghead + 1, &f->node); + bpf_list_push_front((void *)&ghead + 1, &f->node2); bpf_spin_unlock(&glock); return 0; diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index 4c90aa6abddd..b09f4fffe57c 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -93,9 +93,11 @@ long rbtree_add_and_remove(void *ctx) res = bpf_rbtree_remove(&groot, &n->node); bpf_spin_unlock(&glock); + if (!res) + return 1; + n = container_of(res, struct node_data, node); removed_key = n->key; - bpf_obj_drop(n); return 0; @@ -148,9 +150,11 @@ long rbtree_first_and_remove(void *ctx) res = bpf_rbtree_remove(&groot, &o->node); bpf_spin_unlock(&glock); + if (!res) + return 5; + o = container_of(res, struct node_data, node); removed_key = o->key; - bpf_obj_drop(o); bpf_spin_lock(&glock); @@ -173,4 +177,70 @@ long rbtree_first_and_remove(void *ctx) return 1; } +SEC("tc") +long rbtree_api_release_aliasing(void *ctx) +{ + struct node_data *n, *m, *o; + struct bpf_rb_node *res, *res2; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + n->key = 41; + n->data = 42; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_spin_unlock(&glock); + + bpf_spin_lock(&glock); + + /* m and o point to the same node, + * but verifier doesn't know this + */ + res = bpf_rbtree_first(&groot); + if (!res) + goto err_out; + o = container_of(res, struct node_data, node); + + res = bpf_rbtree_first(&groot); + if (!res) + goto err_out; + m = container_of(res, struct node_data, node); + + res = bpf_rbtree_remove(&groot, &m->node); + /* Retval of previous remove returns an owning reference to m, + * which is the same node non-owning ref o is pointing at. + * We can safely try to remove o as the second rbtree_remove will + * return NULL since the node isn't in a tree. + * + * Previously we relied on the verifier type system + rbtree_remove + * invalidating non-owning refs to ensure that rbtree_remove couldn't + * fail, but now rbtree_remove does runtime checking so we no longer + * invalidate non-owning refs after remove. + */ + res2 = bpf_rbtree_remove(&groot, &o->node); + + bpf_spin_unlock(&glock); + + if (res) { + o = container_of(res, struct node_data, node); + first_data[0] = o->data; + bpf_obj_drop(o); + } + if (res2) { + /* The second remove fails, so res2 is null and this doesn't + * execute + */ + m = container_of(res2, struct node_data, node); + first_data[1] = m->data; + bpf_obj_drop(m); + } + return 0; + +err_out: + bpf_spin_unlock(&glock); + return 1; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index 46d7d18a218f..3fecf1c6dfe5 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=2 alloc_insn=10") +__failure __msg("Unreleased reference id=3 alloc_insn=10") long rbtree_api_remove_no_drop(void *ctx) { struct bpf_rb_node *res; @@ -118,11 +118,13 @@ long rbtree_api_remove_no_drop(void *ctx) res = bpf_rbtree_remove(&groot, res); - n = container_of(res, struct node_data, node); - __sink(n); + if (res) { + n = container_of(res, struct node_data, node); + __sink(n); + } bpf_spin_unlock(&glock); - /* bpf_obj_drop(n) is missing here */ + /* if (res) { bpf_obj_drop(n); } is missing here */ return 0; unlock_err: @@ -149,6 +151,32 @@ long rbtree_api_add_to_multiple_trees(void *ctx) return 0; } +SEC("?tc") +__failure __msg("dereference of modified ptr_or_null_ ptr R2 off=16 disallowed") +long rbtree_api_use_unchecked_remove_retval(void *ctx) +{ + struct bpf_rb_node *res; + + bpf_spin_lock(&glock); + + res = bpf_rbtree_first(&groot); + if (!res) + goto err_out; + res = bpf_rbtree_remove(&groot, res); + + bpf_spin_unlock(&glock); + + bpf_spin_lock(&glock); + /* Must check res for NULL before using in rbtree_add below */ + bpf_rbtree_add(&groot, res, less); + bpf_spin_unlock(&glock); + return 0; + +err_out: + bpf_spin_unlock(&glock); + return 1; +} + SEC("?tc") __failure __msg("rbtree_remove node input must be non-owning ref") long rbtree_api_add_release_unlock_escape(void *ctx) @@ -173,57 +201,6 @@ long rbtree_api_add_release_unlock_escape(void *ctx) return 0; } -SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") -long rbtree_api_release_aliasing(void *ctx) -{ - struct node_data *n, *m, *o; - struct bpf_rb_node *res; - - n = bpf_obj_new(typeof(*n)); - if (!n) - return 1; - - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_spin_unlock(&glock); - - bpf_spin_lock(&glock); - - /* m and o point to the same node, - * but verifier doesn't know this - */ - res = bpf_rbtree_first(&groot); - if (!res) - return 1; - o = container_of(res, struct node_data, node); - - res = bpf_rbtree_first(&groot); - if (!res) - return 1; - m = container_of(res, struct node_data, node); - - bpf_rbtree_remove(&groot, &m->node); - /* This second remove shouldn't be possible. Retval of previous - * remove returns owning reference to m, which is the same - * node o's non-owning ref is pointing at - * - * In order to preserve property - * * owning ref must not be in rbtree - * * non-owning ref must be in rbtree - * - * o's ref must be invalidated after previous remove. Otherwise - * we'd have non-owning ref to node that isn't in rbtree, and - * verifier wouldn't be able to use type system to prevent remove - * of ref that already isn't in any tree. Would have to do runtime - * checks in that case. - */ - bpf_rbtree_remove(&groot, &o->node); - - bpf_spin_unlock(&glock); - return 0; -} - SEC("?tc") __failure __msg("rbtree_remove node input must be non-owning ref") long rbtree_api_first_release_unlock_escape(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c new file mode 100644 index 000000000000..1d348a225140 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + long key; + long list_data; + struct bpf_rb_node r; + struct bpf_list_node l; + struct bpf_refcount ref; +}; + +struct map_value { + struct node_data __kptr *node; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} stashed_nodes SEC(".maps"); + +struct node_acquire { + long key; + long data; + struct bpf_rb_node node; + struct bpf_refcount refcount; +}; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock lock; +private(A) struct bpf_rb_root root __contains(node_data, r); +private(A) struct bpf_list_head head __contains(node_data, l); + +private(B) struct bpf_spin_lock alock; +private(B) struct bpf_rb_root aroot __contains(node_acquire, node); + +static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b) +{ + struct node_data *a; + struct node_data *b; + + a = container_of(node_a, struct node_data, r); + b = container_of(node_b, struct node_data, r); + + return a->key < b->key; +} + +static bool less_a(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_acquire *node_a; + struct node_acquire *node_b; + + node_a = container_of(a, struct node_acquire, node); + node_b = container_of(b, struct node_acquire, node); + + return node_a->key < node_b->key; +} + +static long __insert_in_tree_and_list(struct bpf_list_head *head, + struct bpf_rb_root *root, + struct bpf_spin_lock *lock) +{ + struct node_data *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return -1; + + m = bpf_refcount_acquire(n); + m->key = 123; + m->list_data = 456; + + bpf_spin_lock(lock); + if (bpf_rbtree_add(root, &n->r, less)) { + /* Failure to insert - unexpected */ + bpf_spin_unlock(lock); + bpf_obj_drop(m); + return -2; + } + bpf_spin_unlock(lock); + + bpf_spin_lock(lock); + if (bpf_list_push_front(head, &m->l)) { + /* Failure to insert - unexpected */ + bpf_spin_unlock(lock); + return -3; + } + bpf_spin_unlock(lock); + return 0; +} + +static long __stash_map_insert_tree(int idx, int val, struct bpf_rb_root *root, + struct bpf_spin_lock *lock) +{ + struct map_value *mapval; + struct node_data *n, *m; + + mapval = bpf_map_lookup_elem(&stashed_nodes, &idx); + if (!mapval) + return -1; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return -2; + + n->key = val; + m = bpf_refcount_acquire(n); + + n = bpf_kptr_xchg(&mapval->node, n); + if (n) { + bpf_obj_drop(n); + bpf_obj_drop(m); + return -3; + } + + bpf_spin_lock(lock); + if (bpf_rbtree_add(root, &m->r, less)) { + /* Failure to insert - unexpected */ + bpf_spin_unlock(lock); + return -4; + } + bpf_spin_unlock(lock); + return 0; +} + +static long __read_from_tree(struct bpf_rb_root *root, + struct bpf_spin_lock *lock, + bool remove_from_tree) +{ + struct bpf_rb_node *rb; + struct node_data *n; + long res = -99; + + bpf_spin_lock(lock); + + rb = bpf_rbtree_first(root); + if (!rb) { + bpf_spin_unlock(lock); + return -1; + } + + n = container_of(rb, struct node_data, r); + res = n->key; + + if (!remove_from_tree) { + bpf_spin_unlock(lock); + return res; + } + + rb = bpf_rbtree_remove(root, rb); + bpf_spin_unlock(lock); + if (!rb) + return -2; + n = container_of(rb, struct node_data, r); + bpf_obj_drop(n); + return res; +} + +static long __read_from_list(struct bpf_list_head *head, + struct bpf_spin_lock *lock, + bool remove_from_list) +{ + struct bpf_list_node *l; + struct node_data *n; + long res = -99; + + bpf_spin_lock(lock); + + l = bpf_list_pop_front(head); + if (!l) { + bpf_spin_unlock(lock); + return -1; + } + + n = container_of(l, struct node_data, l); + res = n->list_data; + + if (!remove_from_list) { + if (bpf_list_push_back(head, &n->l)) { + bpf_spin_unlock(lock); + return -2; + } + } + + bpf_spin_unlock(lock); + + if (remove_from_list) + bpf_obj_drop(n); + return res; +} + +static long __read_from_unstash(int idx) +{ + struct node_data *n = NULL; + struct map_value *mapval; + long val = -99; + + mapval = bpf_map_lookup_elem(&stashed_nodes, &idx); + if (!mapval) + return -1; + + n = bpf_kptr_xchg(&mapval->node, n); + if (!n) + return -2; + + val = n->key; + bpf_obj_drop(n); + return val; +} + +#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \ +SEC("tc") \ +__description(desc) \ +__success __retval(579) \ +long insert_and_remove_tree_##rem_tree##_list_##rem_list(void *ctx) \ +{ \ + long err, tree_data, list_data; \ + \ + err = __insert_in_tree_and_list(&head, &root, &lock); \ + if (err) \ + return err; \ + \ + err = __read_from_tree(&root, &lock, rem_tree); \ + if (err < 0) \ + return err; \ + else \ + tree_data = err; \ + \ + err = __read_from_list(&head, &lock, rem_list); \ + if (err < 0) \ + return err; \ + else \ + list_data = err; \ + \ + return tree_data + list_data; \ +} + +/* After successful insert of struct node_data into both collections: + * - it should have refcount = 2 + * - removing / not removing the node_data from a collection after + * reading should have no effect on ability to read / remove from + * the other collection + */ +INSERT_READ_BOTH(true, true, "insert_read_both: remove from tree + list"); +INSERT_READ_BOTH(false, false, "insert_read_both: remove from neither"); +INSERT_READ_BOTH(true, false, "insert_read_both: remove from tree"); +INSERT_READ_BOTH(false, true, "insert_read_both: remove from list"); + +#undef INSERT_READ_BOTH +#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \ +SEC("tc") \ +__description(desc) \ +__success __retval(579) \ +long insert_and_remove_lf_tree_##rem_tree##_list_##rem_list(void *ctx) \ +{ \ + long err, tree_data, list_data; \ + \ + err = __insert_in_tree_and_list(&head, &root, &lock); \ + if (err) \ + return err; \ + \ + err = __read_from_list(&head, &lock, rem_list); \ + if (err < 0) \ + return err; \ + else \ + list_data = err; \ + \ + err = __read_from_tree(&root, &lock, rem_tree); \ + if (err < 0) \ + return err; \ + else \ + tree_data = err; \ + \ + return tree_data + list_data; \ +} + +/* Similar to insert_read_both, but list data is read and possibly removed + * first + * + * Results should be no different than reading and possibly removing rbtree + * node first + */ +INSERT_READ_BOTH(true, true, "insert_read_both_list_first: remove from tree + list"); +INSERT_READ_BOTH(false, false, "insert_read_both_list_first: remove from neither"); +INSERT_READ_BOTH(true, false, "insert_read_both_list_first: remove from tree"); +INSERT_READ_BOTH(false, true, "insert_read_both_list_first: remove from list"); + +#define INSERT_DOUBLE_READ_AND_DEL(read_fn, read_root, desc) \ +SEC("tc") \ +__description(desc) \ +__success __retval(-1) \ +long insert_double_##read_fn##_and_del_##read_root(void *ctx) \ +{ \ + long err, list_data; \ + \ + err = __insert_in_tree_and_list(&head, &root, &lock); \ + if (err) \ + return err; \ + \ + err = read_fn(&read_root, &lock, true); \ + if (err < 0) \ + return err; \ + else \ + list_data = err; \ + \ + err = read_fn(&read_root, &lock, true); \ + if (err < 0) \ + return err; \ + \ + return err + list_data; \ +} + +/* Insert into both tree and list, then try reading-and-removing from either twice + * + * The second read-and-remove should fail on read step since the node has + * already been removed + */ +INSERT_DOUBLE_READ_AND_DEL(__read_from_tree, root, "insert_double_del: 2x read-and-del from tree"); +INSERT_DOUBLE_READ_AND_DEL(__read_from_list, head, "insert_double_del: 2x read-and-del from list"); + +#define INSERT_STASH_READ(rem_tree, desc) \ +SEC("tc") \ +__description(desc) \ +__success __retval(84) \ +long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \ +{ \ + long err, tree_data, map_data; \ + \ + err = __stash_map_insert_tree(0, 42, &root, &lock); \ + if (err) \ + return err; \ + \ + err = __read_from_tree(&root, &lock, rem_tree); \ + if (err < 0) \ + return err; \ + else \ + tree_data = err; \ + \ + err = __read_from_unstash(0); \ + if (err < 0) \ + return err; \ + else \ + map_data = err; \ + \ + return tree_data + map_data; \ +} + +/* Stash a refcounted node in map_val, insert same node into tree, then try + * reading data from tree then unstashed map_val, possibly removing from tree + * + * Removing from tree should have no effect on map_val kptr validity + */ +INSERT_STASH_READ(true, "insert_stash_read: remove from tree"); +INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree"); + +SEC("tc") +__success +long rbtree_refcounted_node_ref_escapes(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&alock); + bpf_rbtree_add(&aroot, &n->node, less_a); + m = bpf_refcount_acquire(n); + bpf_spin_unlock(&alock); + + m->key = 2; + bpf_obj_drop(m); + return 0; +} + +SEC("tc") +__success +long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + m = bpf_refcount_acquire(n); + m->key = 2; + + bpf_spin_lock(&alock); + bpf_rbtree_add(&aroot, &n->node, less_a); + bpf_spin_unlock(&alock); + + bpf_obj_drop(m); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c new file mode 100644 index 000000000000..efcb308f80ad --- /dev/null +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "bpf_experimental.h" +#include "bpf_misc.h" + +struct node_acquire { + long key; + long data; + struct bpf_rb_node node; + struct bpf_refcount refcount; +}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_rb_root groot __contains(node_acquire, node); + +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_acquire *node_a; + struct node_acquire *node_b; + + node_a = container_of(a, struct node_acquire, node); + node_b = container_of(b, struct node_acquire, node); + + return node_a->key < node_b->key; +} + +SEC("?tc") +__failure __msg("Unreleased reference id=3 alloc_insn=21") +long rbtree_refcounted_node_ref_escapes(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + /* m becomes an owning ref but is never drop'd or added to a tree */ + m = bpf_refcount_acquire(n); + bpf_spin_unlock(&glock); + + m->key = 2; + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference id=3 alloc_insn=9") +long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + + /* m becomes an owning ref but is never drop'd or added to a tree */ + m = bpf_refcount_acquire(n); + m->key = 2; + + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_spin_unlock(&glock); + + return 0; +} + +char _license[] SEC("license") = "GPL";