mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-09 05:31:17 -04:00
Merge branch 'allow-mmap-of-sys-kernel-btf-vmlinux'
Lorenz Bauer says: ==================== Allow mmap of /sys/kernel/btf/vmlinux I'd like to cut down the memory usage of parsing vmlinux BTF in ebpf-go. With some upcoming changes the library is sitting at 5MiB for a parse. Most of that memory is simply copying the BTF blob into user space. By allowing vmlinux BTF to be mmapped read-only into user space I can cut memory usage by about 75%. Signed-off-by: Lorenz Bauer <lmb@isovalent.com> --- Changes in v5: - Fix error return of btf_parse_raw_mmap (Andrii) - Link to v4: https://lore.kernel.org/r/20250510-vmlinux-mmap-v4-0-69e424b2a672@isovalent.com Changes in v4: - Go back to remap_pfn_range for aarch64 compat - Dropped btf_new_no_copy (Andrii) - Fixed nits in selftests (Andrii) - Clearer error handling in the mmap handler (Andrii) - Fixed build on s390 - Link to v3: https://lore.kernel.org/r/20250505-vmlinux-mmap-v3-0-5d53afa060e8@isovalent.com Changes in v3: - Remove slightly confusing calculation of trailing (Alexei) - Use vm_insert_page (Alexei) - Simplified libbpf code - Link to v2: https://lore.kernel.org/r/20250502-vmlinux-mmap-v2-0-95c271434519@isovalent.com Changes in v2: - Use btf__new in selftest - Avoid vm_iomap_memory in btf_vmlinux_mmap - Add VM_DONTDUMP - Add support to libbpf - Link to v1: https://lore.kernel.org/r/20250501-vmlinux-mmap-v1-0-aa2724572598@isovalent.com --- ==================== Link: https://patch.msgid.link/20250520-vmlinux-mmap-v5-0-e8c941acc414@isovalent.com Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
@@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_INFO_BTF
|
||||
#define BTF \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
|
||||
BOUNDED_SECTION_BY(.BTF, _BTF) \
|
||||
} \
|
||||
. = ALIGN(4); \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
|
||||
*(.BTF_ids) \
|
||||
}
|
||||
|
||||
@@ -7,14 +7,46 @@
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/btf.h>
|
||||
|
||||
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
|
||||
extern char __start_BTF[];
|
||||
extern char __stop_BTF[];
|
||||
|
||||
static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
|
||||
const struct bin_attribute *attr,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
|
||||
size_t vm_size = vma->vm_end - vma->vm_start;
|
||||
phys_addr_t addr = virt_to_phys(__start_BTF);
|
||||
unsigned long pfn = addr >> PAGE_SHIFT;
|
||||
|
||||
if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
|
||||
return -EINVAL;
|
||||
|
||||
if (vma->vm_pgoff)
|
||||
return -EINVAL;
|
||||
|
||||
if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
|
||||
return -EACCES;
|
||||
|
||||
if (pfn + pages < pfn)
|
||||
return -EINVAL;
|
||||
|
||||
if ((vm_size >> PAGE_SHIFT) > pages)
|
||||
return -EINVAL;
|
||||
|
||||
vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
|
||||
return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
|
||||
}
|
||||
|
||||
static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
|
||||
.attr = { .name = "vmlinux", .mode = 0444, },
|
||||
.read_new = sysfs_bin_attr_simple_read,
|
||||
.mmap = btf_sysfs_vmlinux_mmap,
|
||||
};
|
||||
|
||||
struct kobject *btf_kobj;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/btf.h>
|
||||
@@ -120,6 +121,9 @@ struct btf {
|
||||
/* whether base_btf should be freed in btf_free for this instance */
|
||||
bool owns_base;
|
||||
|
||||
/* whether raw_data is a (read-only) mmap */
|
||||
bool raw_data_is_mmap;
|
||||
|
||||
/* BTF object FD, if loaded into kernel */
|
||||
int fd;
|
||||
|
||||
@@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
|
||||
return (void *)btf->hdr != btf->raw_data;
|
||||
}
|
||||
|
||||
static void btf_free_raw_data(struct btf *btf)
|
||||
{
|
||||
if (btf->raw_data_is_mmap) {
|
||||
munmap(btf->raw_data, btf->raw_size);
|
||||
btf->raw_data_is_mmap = false;
|
||||
} else {
|
||||
free(btf->raw_data);
|
||||
}
|
||||
btf->raw_data = NULL;
|
||||
}
|
||||
|
||||
void btf__free(struct btf *btf)
|
||||
{
|
||||
if (IS_ERR_OR_NULL(btf))
|
||||
@@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
|
||||
free(btf->types_data);
|
||||
strset__free(btf->strs_set);
|
||||
}
|
||||
free(btf->raw_data);
|
||||
btf_free_raw_data(btf);
|
||||
free(btf->raw_data_swapped);
|
||||
free(btf->type_offs);
|
||||
if (btf->owns_base)
|
||||
@@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
|
||||
return libbpf_ptr(btf_new_empty(base_btf));
|
||||
}
|
||||
|
||||
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
|
||||
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
|
||||
{
|
||||
struct btf *btf;
|
||||
int err;
|
||||
@@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
|
||||
btf->start_str_off = base_btf->hdr->str_len;
|
||||
}
|
||||
|
||||
btf->raw_data = malloc(size);
|
||||
if (!btf->raw_data) {
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
if (is_mmap) {
|
||||
btf->raw_data = (void *)data;
|
||||
btf->raw_data_is_mmap = true;
|
||||
} else {
|
||||
btf->raw_data = malloc(size);
|
||||
if (!btf->raw_data) {
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
memcpy(btf->raw_data, data, size);
|
||||
}
|
||||
memcpy(btf->raw_data, data, size);
|
||||
|
||||
btf->raw_size = size;
|
||||
|
||||
btf->hdr = btf->raw_data;
|
||||
@@ -1083,12 +1104,12 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
|
||||
|
||||
struct btf *btf__new(const void *data, __u32 size)
|
||||
{
|
||||
return libbpf_ptr(btf_new(data, size, NULL));
|
||||
return libbpf_ptr(btf_new(data, size, NULL, false));
|
||||
}
|
||||
|
||||
struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
|
||||
{
|
||||
return libbpf_ptr(btf_new(data, size, base_btf));
|
||||
return libbpf_ptr(btf_new(data, size, base_btf, false));
|
||||
}
|
||||
|
||||
struct btf_elf_secs {
|
||||
@@ -1209,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
|
||||
|
||||
if (secs.btf_base_data) {
|
||||
dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
|
||||
NULL);
|
||||
NULL, false);
|
||||
if (IS_ERR(dist_base_btf)) {
|
||||
err = PTR_ERR(dist_base_btf);
|
||||
dist_base_btf = NULL;
|
||||
@@ -1218,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
|
||||
}
|
||||
|
||||
btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
|
||||
dist_base_btf ?: base_btf);
|
||||
dist_base_btf ?: base_btf, false);
|
||||
if (IS_ERR(btf)) {
|
||||
err = PTR_ERR(btf);
|
||||
goto done;
|
||||
@@ -1335,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
|
||||
}
|
||||
|
||||
/* finally parse BTF data */
|
||||
btf = btf_new(data, sz, base_btf);
|
||||
btf = btf_new(data, sz, base_btf, false);
|
||||
|
||||
err_out:
|
||||
free(data);
|
||||
@@ -1354,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
|
||||
return libbpf_ptr(btf_parse_raw(path, base_btf));
|
||||
}
|
||||
|
||||
static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
|
||||
{
|
||||
struct stat st;
|
||||
void *data;
|
||||
struct btf *btf;
|
||||
int fd, err;
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return libbpf_err_ptr(-errno);
|
||||
|
||||
if (fstat(fd, &st) < 0) {
|
||||
err = -errno;
|
||||
close(fd);
|
||||
return libbpf_err_ptr(err);
|
||||
}
|
||||
|
||||
data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
err = -errno;
|
||||
close(fd);
|
||||
|
||||
if (data == MAP_FAILED)
|
||||
return libbpf_err_ptr(err);
|
||||
|
||||
btf = btf_new(data, st.st_size, base_btf, true);
|
||||
if (IS_ERR(btf))
|
||||
munmap(data, st.st_size);
|
||||
|
||||
return btf;
|
||||
}
|
||||
|
||||
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
|
||||
{
|
||||
struct btf *btf;
|
||||
@@ -1618,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
|
||||
goto exit_free;
|
||||
}
|
||||
|
||||
btf = btf_new(ptr, btf_info.btf_size, base_btf);
|
||||
btf = btf_new(ptr, btf_info.btf_size, base_btf, false);
|
||||
|
||||
exit_free:
|
||||
free(ptr);
|
||||
@@ -1658,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
|
||||
|
||||
static void btf_invalidate_raw_data(struct btf *btf)
|
||||
{
|
||||
if (btf->raw_data) {
|
||||
free(btf->raw_data);
|
||||
btf->raw_data = NULL;
|
||||
}
|
||||
if (btf->raw_data)
|
||||
btf_free_raw_data(btf);
|
||||
if (btf->raw_data_swapped) {
|
||||
free(btf->raw_data_swapped);
|
||||
btf->raw_data_swapped = NULL;
|
||||
@@ -5331,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)
|
||||
pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
|
||||
sysfs_btf_path);
|
||||
} else {
|
||||
btf = btf__parse(sysfs_btf_path, NULL);
|
||||
btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
|
||||
if (IS_ERR(btf))
|
||||
btf = btf__parse(sysfs_btf_path, NULL);
|
||||
|
||||
if (!btf) {
|
||||
err = -errno;
|
||||
pr_warn("failed to read kernel BTF from '%s': %s\n",
|
||||
|
||||
81
tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
Normal file
81
tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
Normal file
@@ -0,0 +1,81 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
/* Copyright (c) 2025 Isovalent */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include <bpf/btf.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static void test_btf_mmap_sysfs(const char *path, struct btf *base)
|
||||
{
|
||||
struct stat st;
|
||||
__u64 btf_size, end;
|
||||
void *raw_data = NULL;
|
||||
int fd = -1;
|
||||
long page_size;
|
||||
struct btf *btf = NULL;
|
||||
|
||||
page_size = sysconf(_SC_PAGESIZE);
|
||||
if (!ASSERT_GE(page_size, 0, "get_page_size"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_OK(stat(path, &st), "stat_btf"))
|
||||
goto cleanup;
|
||||
|
||||
btf_size = st.st_size;
|
||||
end = (btf_size + page_size - 1) / page_size * page_size;
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (!ASSERT_GE(fd, 0, "open_btf"))
|
||||
goto cleanup;
|
||||
|
||||
raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
|
||||
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
|
||||
goto cleanup;
|
||||
|
||||
raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
|
||||
goto cleanup;
|
||||
|
||||
raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
|
||||
goto cleanup;
|
||||
|
||||
raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
|
||||
"mprotect_writable"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
|
||||
"mprotect_executable"))
|
||||
goto cleanup;
|
||||
|
||||
/* Check padding is zeroed */
|
||||
for (int i = btf_size; i < end; i++) {
|
||||
if (((__u8 *)raw_data)[i] != 0) {
|
||||
PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
btf = btf__new_split(raw_data, btf_size, base);
|
||||
if (!ASSERT_OK_PTR(btf, "parse_btf"))
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
btf__free(btf);
|
||||
if (raw_data && raw_data != MAP_FAILED)
|
||||
munmap(raw_data, btf_size);
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
void test_btf_sysfs(void)
|
||||
{
|
||||
test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
|
||||
}
|
||||
Reference in New Issue
Block a user