Merge branch 'bpf-introduce-file-dynptr'

Mykyta Yatsenko says:

====================
bpf: Introduce file dynptr

From: Mykyta Yatsenko <yatsenko@meta.com>

This series adds a new dynptr kind, file dynptr, which enables BPF
programs to perform safe reads from files in a structured way.
Initial motivations include:
 * Parsing the executable’s ELF to locate thread-local variable symbols
 * Capturing stack traces when frame pointers are disabled

By leveraging the existing dynptr abstraction, we reuse the verifier’s
lifetime/size checks and keep the API consistent with existing dynptr
read helpers.

Technical details:
1. Reuses the existing freader library to read files a folio at a time.
2. bpf_dynptr_slice() and bpf_dynptr_read() always copy data from folios
into a program-provided buffer; zero-copy access is intentionally not
supported to keep it simple.
3. Reads may sleep if the requested folios are not in the page cache.
4. Few verifier changes required:
  * Support dynptr destruction in kfuncs
  * Add kfunc address substitution based on whether the program runs in
  a sleepable or non-sleepable context.

Testing:
The final patch adds a selftest that validates BPF program reads the
same data as userspace, page faults are enabled in sleepable context and
disabled in non-sleepable.

Changelog:
---
v4 -> v5
v4: https://lore.kernel.org/all/20251021200334.220542-1-mykyta.yatsenko5@gmail.com/
 * Inlined and removed kfunc_call_imm(), run overflow check for call_imm
 only if !bpf_jit_supports_far_kfunc_call().

v3 -> v4
v3: https://lore.kernel.org/bpf/20251020222538.932915-1-mykyta.yatsenko5@gmail.com/
 * Remove ringbuf usage from selftests
 * bpf_dynptr_set_null(ptr) when discarding file dynptr
 * call kfunc_call_imm() in specialize_kfunc() only, removed
 call from add_kfunc_call()

v2 -> v3
v2: https://lore.kernel.org/bpf/20251015161155.120148-1-mykyta.yatsenko5@gmail.com/
 * Add negative tests
 * Rewrote tests to use LSM for bpf_get_task_exe_file()
 * Move call_imm overflow check into kfunc_call_imm()

v1 -> v2
v1: https://lore.kernel.org/bpf/20251003160416.585080-1-mykyta.yatsenko5@gmail.com/
 * Remove ELF parsing selftest
 * Expanded u32 -> u64 refactoring, changes in include/uapi/linux/bpf.h
 * Removed freader.{c,h}, instead move freader definitions into
 buildid.h.
 * Small refactoring of the multiple folios reading algorithm
 * Directly return error after unmark_stack_slots_dynptr().
 * Make kfuncs receive trusted arguments.
 * Remove enum bpf_is_sleepable, use bool instead
 * Remove unnecessary sorting from specialize_kfunc()
 * Remove bool kfunc_in_sleepable_ctx; field from the struct
 bpf_insn_aux_data, rely on non_sleepable field introduced by Kumar
 * Refactor selftests, do madvise(...MADV_PAGEOUT) for all pages read by
 the test
 * Introduce the test for non-sleepable case, verify it fails with -EFAULT
====================

Link: https://lore.kernel.org/r/20251026203853.135105-1-mykyta.yatsenko5@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov
2025-10-27 09:56:28 -07:00
17 changed files with 648 additions and 194 deletions

View File

@@ -4648,6 +4648,7 @@ F: Documentation/userspace-api/ebpf/
F: arch/*/net/*
F: include/linux/bpf*
F: include/linux/btf*
F: include/linux/buildid.h
F: include/linux/filter.h
F: include/trace/events/xdp.h
F: include/uapi/linux/bpf*

View File

@@ -670,6 +670,9 @@ static inline bool bpf_map_has_internal_structs(struct bpf_map *map)
void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
struct bpf_dynptr *ptr__uninit);
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
@@ -792,12 +795,15 @@ enum bpf_type_flag {
/* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
/* DYNPTR points to file */
DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
| DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
| DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1385,21 +1391,23 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_XDP,
/* Points to skb_metadata_end()-skb_metadata_len() */
BPF_DYNPTR_TYPE_SKB_META,
/* Underlying data is a file */
BPF_DYNPTR_TYPE_FILE,
};
int bpf_dynptr_check_size(u32 size);
u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
int bpf_dynptr_check_size(u64 size);
u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len);
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len);
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
void *src, u32 len, u64 flags);
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
void *buffer__opt, u32 buffer__szk);
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
void *src, u64 len, u64 flags);
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
void *buffer__opt, u64 buffer__szk);
static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
{
u32 size = __bpf_dynptr_size(ptr);
u64 size = __bpf_dynptr_size(ptr);
if (len > size || offset > size - len)
return -E2BIG;

View File

@@ -18,4 +18,29 @@ void init_vmlinux_build_id(void);
static inline void init_vmlinux_build_id(void) { }
#endif
struct freader {
void *buf;
u32 buf_sz;
int err;
union {
struct {
struct file *file;
struct folio *folio;
void *addr;
loff_t folio_off;
bool may_fault;
};
struct {
const char *data;
u64 data_sz;
};
};
};
void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
struct file *file, bool may_fault);
void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz);
const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz);
void freader_cleanup(struct freader *r);
#endif

View File

@@ -5618,7 +5618,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
* long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
* long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5661,7 +5661,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
* long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5671,7 +5671,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5692,7 +5692,7 @@ union bpf_attr {
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*

View File

@@ -28,6 +28,7 @@
#include <linux/verification.h>
#include <linux/task_work.h>
#include <linux/irq_work.h>
#include <linux/buildid.h>
#include "../../lib/kstrtox.h"
@@ -1656,6 +1657,13 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
.arg2_btf_id = BPF_PTR_POISON,
};
struct bpf_dynptr_file_impl {
struct freader freader;
/* 64 bit offset and size overriding 32 bit ones in bpf_dynptr_kern */
u64 offset;
u64 size;
};
/* Since the upper 8 bits of dynptr->size is reserved, the
* maximum supported size is 2^24 - 1.
*/
@@ -1684,23 +1692,65 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt
return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
}
u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
{
if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
struct bpf_dynptr_file_impl *df = ptr->data;
return df->size;
}
return ptr->size & DYNPTR_SIZE_MASK;
}
static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
static void bpf_dynptr_advance_offset(struct bpf_dynptr_kern *ptr, u64 off)
{
if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
struct bpf_dynptr_file_impl *df = ptr->data;
df->offset += off;
return;
}
ptr->offset += off;
}
static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size)
{
u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
ptr->size = new_size | metadata;
if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
struct bpf_dynptr_file_impl *df = ptr->data;
df->size = new_size;
return;
}
ptr->size = (u32)new_size | metadata;
}
int bpf_dynptr_check_size(u32 size)
int bpf_dynptr_check_size(u64 size)
{
return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
}
static int bpf_file_fetch_bytes(struct bpf_dynptr_file_impl *df, u64 offset, void *buf, u64 len)
{
const void *ptr;
if (!buf)
return -EINVAL;
df->freader.buf = buf;
df->freader.buf_sz = len;
ptr = freader_fetch(&df->freader, offset + df->offset, len);
if (!ptr)
return df->freader.err;
if (ptr != buf) /* Force copying into the buffer */
memcpy(buf, ptr, len);
return 0;
}
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size)
{
@@ -1715,7 +1765,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
memset(ptr, 0, sizeof(*ptr));
}
BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u64, size, u64, flags, struct bpf_dynptr_kern *, ptr)
{
int err;
@@ -1750,8 +1800,8 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
};
static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src,
u32 offset, u64 flags)
static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *src,
u64 offset, u64 flags)
{
enum bpf_dynptr_type type;
int err;
@@ -1781,14 +1831,16 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s
case BPF_DYNPTR_TYPE_SKB_META:
memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
return 0;
case BPF_DYNPTR_TYPE_FILE:
return bpf_file_fetch_bytes(src->data, offset, dst, len);
default:
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
return -EFAULT;
}
}
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
BPF_CALL_5(bpf_dynptr_read, void *, dst, u64, len, const struct bpf_dynptr_kern *, src,
u64, offset, u64, flags)
{
return __bpf_dynptr_read(dst, len, src, offset, flags);
}
@@ -1804,8 +1856,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
.arg5_type = ARG_ANYTHING,
};
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
u32 len, u64 flags)
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src,
u64 len, u64 flags)
{
enum bpf_dynptr_type type;
int err;
@@ -1848,8 +1900,8 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
}
}
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u64, offset, void *, src,
u64, len, u64, flags)
{
return __bpf_dynptr_write(dst, offset, src, len, flags);
}
@@ -1865,7 +1917,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
.arg5_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u64, offset, u64, len)
{
enum bpf_dynptr_type type;
int err;
@@ -2680,12 +2732,12 @@ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
* provided buffer, with its contents containing the data, if unable to obtain
* direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
void *buffer__opt, u32 buffer__szk)
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
void *buffer__opt, u64 buffer__szk)
{
const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
enum bpf_dynptr_type type;
u32 len = buffer__szk;
u64 len = buffer__szk;
int err;
if (!ptr->data)
@@ -2719,6 +2771,9 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
}
case BPF_DYNPTR_TYPE_SKB_META:
return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
case BPF_DYNPTR_TYPE_FILE:
err = bpf_file_fetch_bytes(ptr->data, offset, buffer__opt, buffer__szk);
return err ? NULL : buffer__opt;
default:
WARN_ONCE(true, "unknown dynptr type %d\n", type);
return NULL;
@@ -2767,8 +2822,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
* provided buffer, with its contents containing the data, if unable to obtain
* direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
void *buffer__opt, u32 buffer__szk)
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
void *buffer__opt, u64 buffer__szk)
{
const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
@@ -2800,10 +2855,10 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk);
}
__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end)
__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end)
{
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
u32 size;
u64 size;
if (!ptr->data || start > end)
return -EINVAL;
@@ -2813,7 +2868,7 @@ __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end
if (start > size || end > size)
return -ERANGE;
ptr->offset += start;
bpf_dynptr_advance_offset(ptr, start);
bpf_dynptr_set_size(ptr, end - start);
return 0;
@@ -2836,7 +2891,7 @@ __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p)
return __bpf_dynptr_is_rdonly(ptr);
}
__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p)
__bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p)
{
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
@@ -2873,14 +2928,14 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p,
* Copies data from source dynptr to destination dynptr.
* Returns 0 on success; negative error, otherwise.
*/
__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
struct bpf_dynptr *src_ptr, u32 src_off, u32 size)
__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off,
struct bpf_dynptr *src_ptr, u64 src_off, u64 size)
{
struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr;
struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr;
void *src_slice, *dst_slice;
char buf[256];
u32 off;
u64 off;
src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size);
dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size);
@@ -2902,7 +2957,7 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
off = 0;
while (off < size) {
u32 chunk_sz = min_t(u32, sizeof(buf), size - off);
u64 chunk_sz = min_t(u64, sizeof(buf), size - off);
int err;
err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0);
@@ -2928,10 +2983,10 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
* at @offset with the constant byte @val.
* Returns 0 on success; negative error, otherwise.
*/
__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val)
{
__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val)
{
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
u32 chunk_sz, write_off;
u64 chunk_sz, write_off;
char buf[256];
void* slice;
int err;
@@ -2950,11 +3005,11 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
return err;
/* Non-linear data under the dynptr, write from a local buffer */
chunk_sz = min_t(u32, sizeof(buf), size);
chunk_sz = min_t(u64, sizeof(buf), size);
memset(buf, val, chunk_sz);
for (write_off = 0; write_off < size; write_off += chunk_sz) {
chunk_sz = min_t(u32, sizeof(buf), size - write_off);
chunk_sz = min_t(u64, sizeof(buf), size - write_off);
err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0);
if (err)
return err;
@@ -4252,6 +4307,54 @@ __bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct b
return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
}
static int make_file_dynptr(struct file *file, u32 flags, bool may_sleep,
struct bpf_dynptr_kern *ptr)
{
struct bpf_dynptr_file_impl *state;
/* flags is currently unsupported */
if (flags) {
bpf_dynptr_set_null(ptr);
return -EINVAL;
}
state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl));
if (!state) {
bpf_dynptr_set_null(ptr);
return -ENOMEM;
}
state->offset = 0;
state->size = U64_MAX; /* Don't restrict size, as file may change anyways */
freader_init_from_file(&state->freader, NULL, 0, file, may_sleep);
bpf_dynptr_init(ptr, state, BPF_DYNPTR_TYPE_FILE, 0, 0);
bpf_dynptr_set_rdonly(ptr);
return 0;
}
__bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit)
{
return make_file_dynptr(file, flags, false, (struct bpf_dynptr_kern *)ptr__uninit);
}
int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit)
{
return make_file_dynptr(file, flags, true, (struct bpf_dynptr_kern *)ptr__uninit);
}
__bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr)
{
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr;
struct bpf_dynptr_file_impl *df = ptr->data;
if (!df)
return 0;
freader_cleanup(&df->freader);
bpf_mem_free(&bpf_global_ma, df);
bpf_dynptr_set_null(ptr);
return 0;
}
__bpf_kfunc_end_defs();
static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
@@ -4429,6 +4532,8 @@ BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_dynptr_from_file, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_dynptr_file_discard)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
@@ -4469,7 +4574,7 @@ late_initcall(kfunc_init);
/* Get a pointer to dynptr data up to len bytes for read only access. If
* the dynptr doesn't have continuous data up to len bytes, return NULL.
*/
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len)
{
const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr;
@@ -4480,7 +4585,7 @@ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
* the dynptr doesn't have continuous data up to len bytes, or the dynptr
* is read only, return NULL.
*/
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len)
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len)
{
if (__bpf_dynptr_is_rdonly(ptr))
return NULL;

View File

@@ -500,6 +500,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type)
return "xdp";
case BPF_DYNPTR_TYPE_SKB_META:
return "skb_meta";
case BPF_DYNPTR_TYPE_FILE:
return "file";
case BPF_DYNPTR_TYPE_INVALID:
return "<invalid>";
default:

View File

@@ -209,8 +209,6 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static void specialize_kfunc(struct bpf_verifier_env *env,
u32 func_id, u16 offset, unsigned long *addr);
static bool is_trusted_reg(const struct bpf_reg_state *reg);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
@@ -692,6 +690,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_XDP;
case DYNPTR_TYPE_SKB_META:
return BPF_DYNPTR_TYPE_SKB_META;
case DYNPTR_TYPE_FILE:
return BPF_DYNPTR_TYPE_FILE;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
@@ -710,6 +710,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
return DYNPTR_TYPE_XDP;
case BPF_DYNPTR_TYPE_SKB_META:
return DYNPTR_TYPE_SKB_META;
case BPF_DYNPTR_TYPE_FILE:
return DYNPTR_TYPE_FILE;
default:
return 0;
}
@@ -717,7 +719,7 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
{
return type == BPF_DYNPTR_TYPE_RINGBUF;
return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
}
static void __mark_dynptr_reg(struct bpf_reg_state *reg,
@@ -828,6 +830,15 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
struct bpf_func_state *state = func(env, reg);
int spi, ref_obj_id, i;
/*
* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
* be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
* is safe to do directly.
*/
if (reg->type == CONST_PTR_TO_DYNPTR) {
verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released");
return -EFAULT;
}
spi = dynptr_get_spi(env, reg);
if (spi < 0)
return spi;
@@ -3113,6 +3124,9 @@ struct bpf_kfunc_btf_tab {
u32 nr_descs;
};
static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc,
int insn_idx);
static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
{
const struct bpf_kfunc_desc *d0 = a;
@@ -3130,7 +3144,7 @@ static int kfunc_btf_cmp_by_off(const void *a, const void *b)
return d0->offset - d1->offset;
}
static const struct bpf_kfunc_desc *
static struct bpf_kfunc_desc *
find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
{
struct bpf_kfunc_desc desc = {
@@ -3253,12 +3267,12 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
{
const struct btf_type *func, *func_proto;
struct bpf_kfunc_btf_tab *btf_tab;
struct btf_func_model func_model;
struct bpf_kfunc_desc_tab *tab;
struct bpf_prog_aux *prog_aux;
struct bpf_kfunc_desc *desc;
const char *func_name;
struct btf *desc_btf;
unsigned long call_imm;
unsigned long addr;
int err;
@@ -3342,19 +3356,6 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
func_name);
return -EINVAL;
}
specialize_kfunc(env, func_id, offset, &addr);
if (bpf_jit_supports_far_kfunc_call()) {
call_imm = func_id;
} else {
call_imm = BPF_CALL_IMM(addr);
/* Check whether the relative offset overflows desc->imm */
if ((unsigned long)(s32)call_imm != call_imm) {
verbose(env, "address of kernel function %s is out of range\n",
func_name);
return -EINVAL;
}
}
if (bpf_dev_bound_kfunc_id(func_id)) {
err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
@@ -3362,18 +3363,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return err;
}
desc = &tab->descs[tab->nr_descs++];
desc->func_id = func_id;
desc->imm = call_imm;
desc->offset = offset;
desc->addr = addr;
err = btf_distill_func_proto(&env->log, desc_btf,
func_proto, func_name,
&desc->func_model);
if (!err)
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
kfunc_desc_cmp_by_id_off, NULL);
return err;
&func_model);
if (err)
return err;
desc = &tab->descs[tab->nr_descs++];
desc->func_id = func_id;
desc->offset = offset;
desc->addr = addr;
desc->func_model = func_model;
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
kfunc_desc_cmp_by_id_off, NULL);
return 0;
}
static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
@@ -11514,15 +11517,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (meta.release_regno) {
err = -EINVAL;
/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
* be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
* is safe to do directly.
*/
if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released");
return -EFAULT;
}
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
u32 ref_obj_id = meta.ref_obj_id;
@@ -12290,6 +12285,8 @@ enum special_kfunc_type {
KF_bpf_res_spin_unlock,
KF_bpf_res_spin_lock_irqsave,
KF_bpf_res_spin_unlock_irqrestore,
KF_bpf_dynptr_from_file,
KF_bpf_dynptr_file_discard,
KF___bpf_trap,
KF_bpf_task_work_schedule_signal,
KF_bpf_task_work_schedule_resume,
@@ -12362,6 +12359,8 @@ BTF_ID(func, bpf_res_spin_lock)
BTF_ID(func, bpf_res_spin_unlock)
BTF_ID(func, bpf_res_spin_lock_irqsave)
BTF_ID(func, bpf_res_spin_unlock_irqrestore)
BTF_ID(func, bpf_dynptr_from_file)
BTF_ID(func, bpf_dynptr_file_discard)
BTF_ID(func, __bpf_trap)
BTF_ID(func, bpf_task_work_schedule_signal)
BTF_ID(func, bpf_task_work_schedule_resume)
@@ -13325,6 +13324,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
dynptr_arg_type |= DYNPTR_TYPE_XDP;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
dynptr_arg_type |= DYNPTR_TYPE_FILE;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
dynptr_arg_type |= DYNPTR_TYPE_FILE;
meta->release_regno = regno;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
(dynptr_arg_type & MEM_UNINIT)) {
enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
@@ -14005,12 +14009,18 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
*/
if (meta.release_regno) {
err = release_reference(env, regs[meta.release_regno].ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d reference has not been acquired before\n",
func_name, meta.func_id);
return err;
struct bpf_reg_state *reg = &regs[meta.release_regno];
if (meta.initialized_dynptr.ref_obj_id) {
err = unmark_stack_slots_dynptr(env, reg);
} else {
err = release_reference(env, reg->ref_obj_id);
if (err)
verbose(env, "kfunc %s#%d reference has not been acquired before\n",
func_name, meta.func_id);
}
if (err)
return err;
}
if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
@@ -21860,46 +21870,60 @@ static int fixup_call_args(struct bpf_verifier_env *env)
}
/* replace a generic kfunc with a specialized version if necessary */
static void specialize_kfunc(struct bpf_verifier_env *env,
u32 func_id, u16 offset, unsigned long *addr)
static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
{
struct bpf_prog *prog = env->prog;
bool seen_direct_write;
void *xdp_kfunc;
bool is_rdonly;
u32 func_id = desc->func_id;
u16 offset = desc->offset;
unsigned long addr = desc->addr, call_imm;
if (offset) /* return if module BTF is used */
goto set_imm;
if (bpf_dev_bound_kfunc_id(func_id)) {
xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
if (xdp_kfunc) {
*addr = (unsigned long)xdp_kfunc;
return;
}
if (xdp_kfunc)
addr = (unsigned long)xdp_kfunc;
/* fallback to default kfunc when not supported by netdev */
}
if (offset)
return;
if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
seen_direct_write = env->seen_direct_write;
is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
if (is_rdonly)
*addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
/* restore env->seen_direct_write to its original value, since
* may_access_direct_pkt_data mutates it
*/
env->seen_direct_write = seen_direct_write;
} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
if (bpf_lsm_has_d_inode_locked(prog))
addr = (unsigned long)bpf_set_dentry_xattr_locked;
} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
if (bpf_lsm_has_d_inode_locked(prog))
addr = (unsigned long)bpf_remove_dentry_xattr_locked;
} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
if (!env->insn_aux_data[insn_idx].non_sleepable)
addr = (unsigned long)bpf_dynptr_from_file_sleepable;
}
if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] &&
bpf_lsm_has_d_inode_locked(prog))
*addr = (unsigned long)bpf_set_dentry_xattr_locked;
if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] &&
bpf_lsm_has_d_inode_locked(prog))
*addr = (unsigned long)bpf_remove_dentry_xattr_locked;
set_imm:
if (bpf_jit_supports_far_kfunc_call()) {
call_imm = func_id;
} else {
call_imm = BPF_CALL_IMM(addr);
/* Check whether the relative offset overflows desc->imm */
if ((unsigned long)(s32)call_imm != call_imm) {
verbose(env, "address of kernel func_id %u is out of range\n", func_id);
return -EINVAL;
}
}
desc->imm = call_imm;
desc->addr = addr;
return 0;
}
static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
@@ -21922,7 +21946,8 @@ static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn *insn_buf, int insn_idx, int *cnt)
{
const struct bpf_kfunc_desc *desc;
struct bpf_kfunc_desc *desc;
int err;
if (!insn->imm) {
verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
@@ -21942,6 +21967,10 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EFAULT;
}
err = specialize_kfunc(env, desc, insn_idx);
if (err)
return err;
if (!bpf_jit_supports_far_kfunc_call())
insn->imm = BPF_CALL_IMM(desc->addr);
if (insn->off)

View File

@@ -3372,13 +3372,13 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc
* direct calls into all the specific callback implementations
* (copy_user_data_sleepable, copy_user_data_nofault, and so on)
*/
static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size,
const void *unsafe_src,
copy_fn_t str_copy_fn,
struct task_struct *tsk)
{
struct bpf_dynptr_kern *dst;
u32 chunk_sz, off;
u64 chunk_sz, off;
void *dst_slice;
int cnt, err;
char buf[256];
@@ -3392,7 +3392,7 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do
return -E2BIG;
for (off = 0; off < size; off += chunk_sz - 1) {
chunk_sz = min_t(u32, sizeof(buf), size - off);
chunk_sz = min_t(u64, sizeof(buf), size - off);
/* Expect str_copy_fn to return count of copied bytes, including
* zero terminator. Next iteration increment off by chunk_sz - 1 to
* overwrite NUL.
@@ -3409,14 +3409,14 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do
return off;
}
static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
u32 size, const void *unsafe_src,
static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
u64 size, const void *unsafe_src,
copy_fn_t copy_fn, struct task_struct *tsk)
{
struct bpf_dynptr_kern *dst;
void *dst_slice;
char buf[256];
u32 off, chunk_sz;
u64 off, chunk_sz;
int err;
dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
@@ -3428,7 +3428,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32
return -E2BIG;
for (off = 0; off < size; off += chunk_sz) {
chunk_sz = min_t(u32, sizeof(buf), size - off);
chunk_sz = min_t(u64, sizeof(buf), size - off);
err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
if (err)
return err;
@@ -3514,58 +3514,58 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
return bpf_send_signal_common(sig, type, task, value);
}
__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void __user *unsafe_ptr__ign)
__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_data_nofault, NULL);
}
__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void *unsafe_ptr__ign)
__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void *unsafe_ptr__ign)
{
return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
copy_kernel_data_nofault, NULL);
}
__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void __user *unsafe_ptr__ign)
__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_str_nofault, NULL);
}
__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void *unsafe_ptr__ign)
__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void *unsafe_ptr__ign)
{
return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
copy_kernel_str_nofault, NULL);
}
__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void __user *unsafe_ptr__ign)
__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_data_sleepable, NULL);
}
__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void __user *unsafe_ptr__ign)
__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign)
{
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_str_sleepable, NULL);
}
__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void __user *unsafe_ptr__ign,
__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign,
struct task_struct *tsk)
{
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
copy_user_data_sleepable, tsk);
}
__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
u32 size, const void __user *unsafe_ptr__ign,
__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off,
u64 size, const void __user *unsafe_ptr__ign,
struct task_struct *tsk)
{
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,

View File

@@ -11,27 +11,8 @@
#define MAX_PHDR_CNT 256
struct freader {
void *buf;
u32 buf_sz;
int err;
union {
struct {
struct file *file;
struct folio *folio;
void *addr;
loff_t folio_off;
bool may_fault;
};
struct {
const char *data;
u64 data_sz;
};
};
};
static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
struct file *file, bool may_fault)
void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
struct file *file, bool may_fault)
{
memset(r, 0, sizeof(*r));
r->buf = buf;
@@ -40,7 +21,7 @@ static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
r->may_fault = may_fault;
}
static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
{
memset(r, 0, sizeof(*r));
r->data = data;
@@ -92,7 +73,7 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
return 0;
}
static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
{
size_t folio_sz;
@@ -127,18 +108,21 @@ static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
*/
folio_sz = folio_size(r->folio);
if (file_off + sz > r->folio_off + folio_sz) {
int part_sz = r->folio_off + folio_sz - file_off;
u64 part_sz = r->folio_off + folio_sz - file_off, off;
/* copy the part that resides in the current folio */
memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz);
memcpy(r->buf, r->addr + file_off - r->folio_off, part_sz);
off = part_sz;
/* fetch next folio */
r->err = freader_get_folio(r, r->folio_off + folio_sz);
if (r->err)
return NULL;
/* copy the rest of requested data */
memcpy(r->buf + part_sz, r->addr, sz - part_sz);
while (off < sz) {
/* fetch next folio */
r->err = freader_get_folio(r, r->folio_off + folio_sz);
if (r->err)
return NULL;
folio_sz = folio_size(r->folio);
part_sz = min_t(u64, sz - off, folio_sz);
memcpy(r->buf + off, r->addr, part_sz);
off += part_sz;
}
return r->buf;
}
@@ -147,7 +131,7 @@ static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
return r->addr + (file_off - r->folio_off);
}
static void freader_cleanup(struct freader *r)
void freader_cleanup(struct freader *r)
{
if (!r->buf)
return; /* non-file-backed mode */

View File

@@ -5618,7 +5618,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
* long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
* long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5661,7 +5661,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
* long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5671,7 +5671,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5692,7 +5692,7 @@ union bpf_attr {
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*

View File

@@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
void *buffer, __u32 buffer__szk) __ksym __weak;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
void *buffer, __u64 buffer__szk) __ksym __weak;
/* Description
* Obtain a read-write pointer to the dynptr's data
@@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
void *buffer, __u32 buffer__szk) __ksym __weak;
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
__u64 buffer__szk) __ksym __weak;
extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
/* Description

View File

@@ -0,0 +1,113 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <network_helpers.h>
#include "file_reader.skel.h"
#include "file_reader_fail.skel.h"
#include <dlfcn.h>
#include <sys/mman.h>
const char *user_ptr = "hello world";
char file_contents[256000];
void *get_executable_base_addr(void)
{
Dl_info info;
if (!dladdr((void *)&get_executable_base_addr, &info)) {
fprintf(stderr, "dladdr failed\n");
return NULL;
}
return info.dli_fbase;
}
static int initialize_file_contents(void)
{
int fd, page_sz = sysconf(_SC_PAGESIZE);
ssize_t n = 0, cur, off;
void *addr;
fd = open("/proc/self/exe", O_RDONLY);
if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
return 1;
do {
cur = read(fd, file_contents + n, sizeof(file_contents) - n);
if (!ASSERT_GT(cur, 0, "read success"))
break;
n += cur;
} while (n < sizeof(file_contents));
close(fd);
if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
return 1;
addr = get_executable_base_addr();
if (!ASSERT_NEQ(addr, NULL, "get executable address"))
return 1;
/* page-align base file address */
addr = (void *)((unsigned long)addr & ~(page_sz - 1));
for (off = 0; off < sizeof(file_contents); off += page_sz) {
if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
"madvise pageout"))
return errno;
}
return 0;
}
static void run_test(const char *prog_name)
{
struct file_reader *skel;
struct bpf_program *prog;
int err, fd;
err = initialize_file_contents();
if (!ASSERT_OK(err, "initialize file contents"))
return;
skel = file_reader__open();
if (!ASSERT_OK_PTR(skel, "file_reader__open"))
return;
bpf_object__for_each_program(prog, skel->obj) {
bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
}
memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
skel->bss->pid = getpid();
err = file_reader__load(skel);
if (!ASSERT_OK(err, "file_reader__load"))
goto cleanup;
err = file_reader__attach(skel);
if (!ASSERT_OK(err, "file_reader__attach"))
goto cleanup;
fd = open("/proc/self/exe", O_RDONLY);
if (fd >= 0)
close(fd);
ASSERT_EQ(skel->bss->err, 0, "err");
ASSERT_EQ(skel->bss->run_success, 1, "run_success");
cleanup:
file_reader__destroy(skel);
}
void test_file_reader(void)
{
if (test__start_subtest("on_open_expect_fault"))
run_test("on_open_expect_fault");
if (test__start_subtest("on_open_validate_file_read"))
run_test("on_open_validate_file_read");
if (test__start_subtest("negative"))
RUN_TESTS(file_reader_fail);
}

View File

@@ -914,8 +914,8 @@ void *user_ptr;
char expected_str[384];
__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off,
u32 size, const void *unsafe_ptr);
typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off,
u64 size, const void *unsafe_ptr);
/* Returns the offset just before the end of the maximum sized xdp fragment.
* Any write larger than 32 bytes will be split between 2 fragments.
@@ -1106,16 +1106,16 @@ int test_copy_from_user_str_dynptr(void *ctx)
return 0;
}
static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off,
u32 size, const void *unsafe_ptr)
static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off,
u64 size, const void *unsafe_ptr)
{
struct task_struct *task = bpf_get_current_task_btf();
return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
}
static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off,
u32 size, const void *unsafe_ptr)
static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off,
u64 size, const void *unsafe_ptr)
{
struct task_struct *task = bpf_get_current_task_btf();

View File

@@ -0,0 +1,145 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <vmlinux.h>
#include <string.h>
#include <stdbool.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__type(key, int);
__type(value, struct elem);
} arrmap SEC(".maps");
struct elem {
struct file *file;
struct bpf_task_work tw;
};
char user_buf[256000];
char tmp_buf[256000];
int pid = 0;
int err, run_success = 0;
static int validate_file_read(struct file *file);
static int task_work_callback(struct bpf_map *map, void *key, void *value);
SEC("lsm/file_open")
int on_open_expect_fault(void *c)
{
struct bpf_dynptr dynptr;
struct file *file;
int local_err = 1;
__u32 user_buf_sz = sizeof(user_buf);
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
file = bpf_get_task_exe_file(bpf_get_current_task_btf());
if (!file)
return 0;
if (bpf_dynptr_from_file(file, 0, &dynptr))
goto out;
local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, 0, 0);
if (local_err == -EFAULT) { /* Expect page fault */
local_err = 0;
run_success = 1;
}
out:
bpf_dynptr_file_discard(&dynptr);
if (local_err)
err = local_err;
bpf_put_file(file);
return 0;
}
SEC("lsm/file_open")
int on_open_validate_file_read(void *c)
{
struct task_struct *task = bpf_get_current_task_btf();
struct elem *work;
int key = 0;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
work = bpf_map_lookup_elem(&arrmap, &key);
if (!work) {
err = 1;
return 0;
}
bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback, NULL);
return 0;
}
/* Called in a sleepable context, read 256K bytes, cross check with user space read data */
static int task_work_callback(struct bpf_map *map, void *key, void *value)
{
struct task_struct *task = bpf_get_current_task_btf();
struct file *file = bpf_get_task_exe_file(task);
if (!file)
return 0;
err = validate_file_read(file);
if (!err)
run_success = 1;
bpf_put_file(file);
return 0;
}
static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len)
{
int i;
if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0))
return 1;
/* Verify file contents read from BPF is the same as the one read from userspace */
bpf_for(i, 0, len)
{
if (tmp_buf[i] != user_buf[i])
return 1;
}
return 0;
}
static int validate_file_read(struct file *file)
{
struct bpf_dynptr dynptr;
int loc_err = 1, off;
__u32 user_buf_sz = sizeof(user_buf);
if (bpf_dynptr_from_file(file, 0, &dynptr))
goto cleanup;
loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz);
off = 1;
loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
off = user_buf_sz - 1;
loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
/* Read file with random offset and length */
off = 4097;
loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100);
/* Adjust dynptr, verify read */
loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1);
loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1);
/* Can't read more than 1 byte */
loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0;
/* Can't read with far offset */
loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0;
cleanup:
bpf_dynptr_file_discard(&dynptr);
return loc_err;
}

View File

@@ -0,0 +1,52 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <vmlinux.h>
#include <string.h>
#include <stdbool.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
int err;
void *user_ptr;
SEC("lsm/file_open")
__failure
__msg("Unreleased reference id=")
int on_nanosleep_unreleased_ref(void *ctx)
{
struct task_struct *task = bpf_get_current_task_btf();
struct file *file = bpf_get_task_exe_file(task);
struct bpf_dynptr dynptr;
if (!file)
return 0;
err = bpf_dynptr_from_file(file, 0, &dynptr);
return err ? 1 : 0;
}
SEC("xdp")
__failure
__msg("Expected a dynptr of type file as arg #0")
int xdp_wrong_dynptr_type(struct xdp_md *xdp)
{
struct bpf_dynptr dynptr;
bpf_dynptr_from_xdp(xdp, 0, &dynptr);
bpf_dynptr_file_discard(&dynptr);
return 0;
}
SEC("xdp")
__failure
__msg("Expected an initialized dynptr as arg #0")
int xdp_no_dynptr_type(struct xdp_md *xdp)
{
struct bpf_dynptr dynptr;
bpf_dynptr_file_discard(&dynptr);
return 0;
}

View File

@@ -12,11 +12,6 @@
#define IP_OFFSET 0x1FFF
#define NEXTHDR_FRAGMENT 44
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
volatile int shootdowns = 0;
static bool is_frag_v4(struct iphdr *iph)

View File

@@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
return NF_ACCEPT;
}
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
SEC("netfilter")
__description("netfilter test prog with skb and state read access")
__success __failure_unpriv