From 1afaf661b20a075a22eef3332db187001a334b81 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 4 Dec 2016 23:19:39 +0100 Subject: [PATCH 1/3] bpf: remove type arg from __is_valid_{,xdp_}access Commit d691f9e8d440 ("bpf: allow programs to write to certain skb fields") pushed access type check outside of __is_valid_access() to have different restrictions for socket filters and tc programs. type is thus not used anymore within __is_valid_access() and should be removed as a function argument. Same for __is_valid_xdp_access() introduced by 6a773a15a1e8 ("bpf: add XDP prog type for early driver filter"). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 56b43587d200..b751202e12f8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2748,7 +2748,7 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } -static bool __is_valid_access(int off, int size, enum bpf_access_type type) +static bool __is_valid_access(int off, int size) { if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -2782,7 +2782,7 @@ static bool sk_filter_is_valid_access(int off, int size, } } - return __is_valid_access(off, size, type); + return __is_valid_access(off, size); } static bool lwt_is_valid_access(int off, int size, @@ -2815,7 +2815,7 @@ static bool lwt_is_valid_access(int off, int size, break; } - return __is_valid_access(off, size, type); + return __is_valid_access(off, size); } static bool sock_filter_is_valid_access(int off, int size, @@ -2833,11 +2833,9 @@ static bool sock_filter_is_valid_access(int off, int size, if (off < 0 || off + size > sizeof(struct bpf_sock)) return false; - /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - if (size != sizeof(__u32)) return false; @@ -2910,11 +2908,10 @@ static bool tc_cls_act_is_valid_access(int off, int size, break; } - return __is_valid_access(off, size, type); + return __is_valid_access(off, size); } -static bool __is_valid_xdp_access(int off, int size, - enum bpf_access_type type) +static bool __is_valid_xdp_access(int off, int size) { if (off < 0 || off >= sizeof(struct xdp_md)) return false; @@ -2942,7 +2939,7 @@ static bool xdp_is_valid_access(int off, int size, break; } - return __is_valid_xdp_access(off, size, type); + return __is_valid_xdp_access(off, size); } void bpf_warn_invalid_xdp_action(u32 act) From 8d829bdb97dc3a0c9c8090b9b168ca46ea99c8d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 4 Dec 2016 23:19:40 +0100 Subject: [PATCH 2/3] bpf, cls: consolidate prog deletion path Commit 18cdb37ebf4c ("net: sched: do not use tcf_proto 'tp' argument from call_rcu") removed the last usage of tp from cls_bpf_delete_prog(), so also remove it from the function as argument to not give a wrong impression. tp is illegal to access from this callback, since it could already have been freed. Refactor the deletion code a bit, so that cls_bpf_destroy() can call into the same code for prog deletion as cls_bpf_delete() op, instead of having it unnecessarily duplicated. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c37aa8b77fb5..f70e03d2d2c8 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -241,7 +241,7 @@ static int cls_bpf_init(struct tcf_proto *tp) return 0; } -static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) +static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); @@ -255,22 +255,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) kfree(prog); } -static void __cls_bpf_delete_prog(struct rcu_head *rcu) +static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) { - struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); +} - cls_bpf_delete_prog(prog->tp, prog); +static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) +{ + cls_bpf_stop_offload(tp, prog); + list_del_rcu(&prog->link); + tcf_unbind_filter(tp, &prog->res); + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); } static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { - struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; - - cls_bpf_stop_offload(tp, prog); - list_del_rcu(&prog->link); - tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, __cls_bpf_delete_prog); - + __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg); return 0; } @@ -282,12 +282,8 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) if (!force && !list_empty(&head->plist)) return false; - list_for_each_entry_safe(prog, tmp, &head->plist, link) { - cls_bpf_stop_offload(tp, prog); - list_del_rcu(&prog->link); - tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, __cls_bpf_delete_prog); - } + list_for_each_entry_safe(prog, tmp, &head->plist, link) + __cls_bpf_delete(tp, prog); kfree_rcu(head, rcu); return true; @@ -511,14 +507,14 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, ret = cls_bpf_offload(tp, prog, oldprog); if (ret) { - cls_bpf_delete_prog(tp, prog); + __cls_bpf_delete_prog(prog); return ret; } if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); - call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); + call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); } From 7bd509e311f408f7a5132fcdde2069af65fa05ae Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 4 Dec 2016 23:19:41 +0100 Subject: [PATCH 3/3] bpf: add prog_digest and expose it via fdinfo/netlink When loading a BPF program via bpf(2), calculate the digest over the program's instruction stream and store it in struct bpf_prog's digest member. This is done at a point in time before any instructions are rewritten by the verifier. Any unstable map file descriptor number part of the imm field will be zeroed for the hash. fdinfo example output for progs: # cat /proc/1590/fdinfo/5 pos: 0 flags: 02000002 mnt_id: 11 prog_type: 1 prog_jited: 1 prog_digest: b27e8b06da22707513aa97363dfb11c7c3675d28 memlock: 4096 When programs are pinned and retrieved by an ELF loader, the loader can check the program's digest through fdinfo and compare it against one that was generated over the ELF file's program section to see if the program needs to be reloaded. Furthermore, this can also be exposed through other means such as netlink in case of a tc cls/act dump (or xdp in future), but also through tracepoints or other facilities to identify the program. Other than that, the digest can also serve as a base name for the work in progress kallsyms support of programs. The digest doesn't depend/select the crypto layer, since we need to keep dependencies to a minimum. iproute2 will get support for this facility. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/filter.h | 7 +++- include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/tc_act/tc_bpf.h | 1 + kernel/bpf/core.c | 65 ++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 24 ++++++++++- kernel/bpf/verifier.c | 2 + net/sched/act_bpf.c | 9 +++++ net/sched/cls_bpf.c | 8 ++++ 9 files changed, 116 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 69d0a7f12a3b..8796ff03f472 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,6 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +void bpf_prog_calc_digest(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index 97338134398f..f078d2b1cff6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -56,6 +57,9 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +/* Maximum BPF program size in bytes. */ +#define MAX_BPF_SIZE (BPF_MAXINSNS * sizeof(struct bpf_insn)) + /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -404,8 +408,9 @@ struct bpf_prog { cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); - u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ + u32 len; /* Number of filter blocks */ + u32 digest[SHA_DIGEST_WORDS]; /* Program digest */ struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const void *ctx, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 86786d45ee66..1adc0b654996 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,6 +397,7 @@ enum { TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, + TCA_BPF_DIGEST, __TCA_BPF_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 063d9d465119..a6b88a6f7f71 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -27,6 +27,7 @@ enum { TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, + TCA_ACT_BPF_DIGEST, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 82a04143368e..bdcc9f4ba767 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -136,6 +136,71 @@ void __bpf_prog_free(struct bpf_prog *fp) vfree(fp); } +#define SHA_BPF_RAW_SIZE \ + round_up(MAX_BPF_SIZE + sizeof(__be64) + 1, SHA_MESSAGE_BYTES) + +/* Called under verifier mutex. */ +void bpf_prog_calc_digest(struct bpf_prog *fp) +{ + const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); + static u32 ws[SHA_WORKSPACE_WORDS]; + static u8 raw[SHA_BPF_RAW_SIZE]; + struct bpf_insn *dst = (void *)raw; + u32 i, bsize, psize, blocks; + bool was_ld_map; + u8 *todo = raw; + __be32 *result; + __be64 *bits; + + sha_init(fp->digest); + memset(ws, 0, sizeof(ws)); + + /* We need to take out the map fd for the digest calculation + * since they are unstable from user space side. + */ + for (i = 0, was_ld_map = false; i < fp->len; i++) { + dst[i] = fp->insnsi[i]; + if (!was_ld_map && + dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && + dst[i].src_reg == BPF_PSEUDO_MAP_FD) { + was_ld_map = true; + dst[i].imm = 0; + } else if (was_ld_map && + dst[i].code == 0 && + dst[i].dst_reg == 0 && + dst[i].src_reg == 0 && + dst[i].off == 0) { + was_ld_map = false; + dst[i].imm = 0; + } else { + was_ld_map = false; + } + } + + psize = fp->len * sizeof(struct bpf_insn); + memset(&raw[psize], 0, sizeof(raw) - psize); + raw[psize++] = 0x80; + + bsize = round_up(psize, SHA_MESSAGE_BYTES); + blocks = bsize / SHA_MESSAGE_BYTES; + if (bsize - psize >= sizeof(__be64)) { + bits = (__be64 *)(todo + bsize - sizeof(__be64)); + } else { + bits = (__be64 *)(todo + bsize + bits_offset); + blocks++; + } + *bits = cpu_to_be64((psize - 1) << 3); + + while (blocks--) { + sha_transform(fp->digest, todo, ws); + todo += SHA_MESSAGE_BYTES; + } + + result = (__force __be32 *)fp->digest; + for (i = 0; i < SHA_DIGEST_WORDS; i++) + result[i] = cpu_to_be32(fp->digest[i]); +} + static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_JMP && diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 85af86c496cd..c0d2b423ce93 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -662,8 +662,30 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +#ifdef CONFIG_PROC_FS +static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct bpf_prog *prog = filp->private_data; + char prog_digest[sizeof(prog->digest) * 2 + 1] = { }; + + bin2hex(prog_digest, prog->digest, sizeof(prog->digest)); + seq_printf(m, + "prog_type:\t%u\n" + "prog_jited:\t%u\n" + "prog_digest:\t%s\n" + "memlock:\t%llu\n", + prog->type, + prog->jited, + prog_digest, + prog->pages * 1ULL << PAGE_SHIFT); +} +#endif + static const struct file_operations bpf_prog_fops = { - .release = bpf_prog_release, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_prog_show_fdinfo, +#endif + .release = bpf_prog_release, }; int bpf_prog_new_fd(struct bpf_prog *prog) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 38d05da84a49..cb37339ca0da 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3176,6 +3176,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) log_level = 0; } + bpf_prog_calc_digest(env->prog); + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 84c1d2da4f8b..1c60317f0121 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -117,10 +117,19 @@ static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, struct sk_buff *skb) { + struct nlattr *nla; + if (prog->bpf_name && nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; + nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST, + sizeof(prog->filter->digest)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + return 0; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index f70e03d2d2c8..adc776048d1a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -549,10 +549,18 @@ static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog, static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, struct sk_buff *skb) { + struct nlattr *nla; + if (prog->bpf_name && nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; + nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + return 0; }