mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-23 22:21:38 -05:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Cross-merge bpf fixes after downstream PR. No conflicts. Adjacent changes in: include/linux/bpf.h include/uapi/linux/bpf.h kernel/bpf/btf.c kernel/bpf/helpers.c kernel/bpf/syscall.c kernel/bpf/verifier.c kernel/trace/bpf_trace.c mm/slab_common.c tools/include/uapi/linux/bpf.h tools/testing/selftests/bpf/Makefile Link: https://lore.kernel.org/all/20241024215724.60017-1-daniel@iogearbox.net/ Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -339,10 +339,6 @@ BTF_ID(func, bpf_lsm_path_chmod)
|
||||
BTF_ID(func, bpf_lsm_path_chown)
|
||||
#endif /* CONFIG_SECURITY_PATH */
|
||||
|
||||
#ifdef CONFIG_KEYS
|
||||
BTF_ID(func, bpf_lsm_key_free)
|
||||
#endif /* CONFIG_KEYS */
|
||||
|
||||
BTF_ID(func, bpf_lsm_mmap_file)
|
||||
BTF_ID(func, bpf_lsm_netlink_send)
|
||||
BTF_ID(func, bpf_lsm_path_notify)
|
||||
|
||||
@@ -3528,7 +3528,7 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
|
||||
* (i + 1) * elem_size
|
||||
* where i is the repeat index and elem_size is the size of an element.
|
||||
*/
|
||||
static int btf_repeat_fields(struct btf_field_info *info,
|
||||
static int btf_repeat_fields(struct btf_field_info *info, int info_cnt,
|
||||
u32 field_cnt, u32 repeat_cnt, u32 elem_size)
|
||||
{
|
||||
u32 i, j;
|
||||
@@ -3549,6 +3549,12 @@ static int btf_repeat_fields(struct btf_field_info *info,
|
||||
}
|
||||
}
|
||||
|
||||
/* The type of struct size or variable size is u32,
|
||||
* so the multiplication will not overflow.
|
||||
*/
|
||||
if (field_cnt * (repeat_cnt + 1) > info_cnt)
|
||||
return -E2BIG;
|
||||
|
||||
cur = field_cnt;
|
||||
for (i = 0; i < repeat_cnt; i++) {
|
||||
memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0]));
|
||||
@@ -3593,7 +3599,7 @@ static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *
|
||||
info[i].off += off;
|
||||
|
||||
if (nelems > 1) {
|
||||
err = btf_repeat_fields(info, ret, nelems - 1, t->size);
|
||||
err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size);
|
||||
if (err == 0)
|
||||
ret *= nelems;
|
||||
else
|
||||
@@ -3688,10 +3694,10 @@ static int btf_find_field_one(const struct btf *btf,
|
||||
|
||||
if (ret == BTF_FIELD_IGNORE)
|
||||
return 0;
|
||||
if (nelems > info_cnt)
|
||||
if (!info_cnt)
|
||||
return -E2BIG;
|
||||
if (nelems > 1) {
|
||||
ret = btf_repeat_fields(info, 1, nelems - 1, sz);
|
||||
ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
@@ -8985,6 +8991,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
|
||||
if (!type) {
|
||||
bpf_log(ctx->log, "relo #%u: bad type id %u\n",
|
||||
relo_idx, relo->type_id);
|
||||
kfree(specs);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
@@ -333,9 +333,11 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
|
||||
|
||||
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
|
||||
struct xdp_frame **frames, int n,
|
||||
struct net_device *dev)
|
||||
struct net_device *tx_dev,
|
||||
struct net_device *rx_dev)
|
||||
{
|
||||
struct xdp_txq_info txq = { .dev = dev };
|
||||
struct xdp_txq_info txq = { .dev = tx_dev };
|
||||
struct xdp_rxq_info rxq = { .dev = rx_dev };
|
||||
struct xdp_buff xdp;
|
||||
int i, nframes = 0;
|
||||
|
||||
@@ -346,6 +348,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
|
||||
|
||||
xdp_convert_frame_to_buff(xdpf, &xdp);
|
||||
xdp.txq = &txq;
|
||||
xdp.rxq = &rxq;
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||
switch (act) {
|
||||
@@ -360,7 +363,7 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
|
||||
bpf_warn_invalid_xdp_action(NULL, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_ABORTED:
|
||||
trace_xdp_exception(dev, xdp_prog, act);
|
||||
trace_xdp_exception(tx_dev, xdp_prog, act);
|
||||
fallthrough;
|
||||
case XDP_DROP:
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
@@ -388,7 +391,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
|
||||
}
|
||||
|
||||
if (bq->xdp_prog) {
|
||||
to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
|
||||
to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev, bq->dev_rx);
|
||||
if (!to_send)
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -111,7 +111,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = {
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
|
||||
.arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
|
||||
@@ -124,7 +124,7 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
|
||||
.arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
|
||||
@@ -538,7 +538,7 @@ const struct bpf_func_proto bpf_strtol_proto = {
|
||||
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
|
||||
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
|
||||
.arg4_size = sizeof(s64),
|
||||
};
|
||||
|
||||
@@ -566,7 +566,7 @@ const struct bpf_func_proto bpf_strtoul_proto = {
|
||||
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
|
||||
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
|
||||
.arg4_size = sizeof(u64),
|
||||
};
|
||||
|
||||
@@ -1742,7 +1742,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
|
||||
|
||||
@@ -880,7 +880,7 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
const struct btf_type *enum_t;
|
||||
const char *enum_pfx;
|
||||
u64 *delegate_msk, msk = 0;
|
||||
char *p;
|
||||
char *p, *str;
|
||||
int val;
|
||||
|
||||
/* ignore errors, fallback to hex */
|
||||
@@ -911,7 +911,8 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
while ((p = strsep(¶m->string, ":"))) {
|
||||
str = param->string;
|
||||
while ((p = strsep(&str, ":"))) {
|
||||
if (strcmp(p, "any") == 0) {
|
||||
msk |= ~0ULL;
|
||||
} else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) {
|
||||
|
||||
@@ -688,8 +688,7 @@ static void print_reg_state(struct bpf_verifier_env *env,
|
||||
if (t == SCALAR_VALUE && reg->precise)
|
||||
verbose(env, "P");
|
||||
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
|
||||
/* reg->off should be 0 for SCALAR_VALUE */
|
||||
verbose_snum(env, reg->var_off.value + reg->off);
|
||||
verbose_snum(env, reg->var_off.value);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ struct bpf_ringbuf {
|
||||
u64 mask;
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
spinlock_t spinlock ____cacheline_aligned_in_smp;
|
||||
raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
|
||||
/* For user-space producer ring buffers, an atomic_t busy bit is used
|
||||
* to synchronize access to the ring buffers in the kernel, rather than
|
||||
* the spinlock that is used for kernel-producer ring buffers. This is
|
||||
@@ -173,7 +173,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
|
||||
if (!rb)
|
||||
return NULL;
|
||||
|
||||
spin_lock_init(&rb->spinlock);
|
||||
raw_spin_lock_init(&rb->spinlock);
|
||||
atomic_set(&rb->busy, 0);
|
||||
init_waitqueue_head(&rb->waitq);
|
||||
init_irq_work(&rb->work, bpf_ringbuf_notify);
|
||||
@@ -421,10 +421,10 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
cons_pos = smp_load_acquire(&rb->consumer_pos);
|
||||
|
||||
if (in_nmi()) {
|
||||
if (!spin_trylock_irqsave(&rb->spinlock, flags))
|
||||
if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
|
||||
return NULL;
|
||||
} else {
|
||||
spin_lock_irqsave(&rb->spinlock, flags);
|
||||
raw_spin_lock_irqsave(&rb->spinlock, flags);
|
||||
}
|
||||
|
||||
pend_pos = rb->pending_pos;
|
||||
@@ -450,7 +450,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
*/
|
||||
if (new_prod_pos - cons_pos > rb->mask ||
|
||||
new_prod_pos - pend_pos > rb->mask) {
|
||||
spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -462,7 +462,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
/* pairs with consumer's smp_load_acquire() */
|
||||
smp_store_release(&rb->producer_pos, new_prod_pos);
|
||||
|
||||
spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
|
||||
return (void *)hdr + BPF_RINGBUF_HDR_SZ;
|
||||
}
|
||||
@@ -632,7 +632,7 @@ const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = {
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT | MEM_WRITE,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
|
||||
|
||||
@@ -3169,13 +3169,17 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||
{
|
||||
const struct bpf_link *link = filp->private_data;
|
||||
const struct bpf_prog *prog = link->prog;
|
||||
enum bpf_link_type type = link->type;
|
||||
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
|
||||
|
||||
seq_printf(m,
|
||||
"link_type:\t%s\n"
|
||||
"link_id:\t%u\n",
|
||||
bpf_link_type_strs[link->type],
|
||||
link->id);
|
||||
if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) {
|
||||
seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]);
|
||||
} else {
|
||||
WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type);
|
||||
seq_printf(m, "link_type:\t<%u>\n", type);
|
||||
}
|
||||
seq_printf(m, "link_id:\t%u\n", link->id);
|
||||
|
||||
if (prog) {
|
||||
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
||||
seq_printf(m,
|
||||
@@ -3666,15 +3670,16 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
|
||||
}
|
||||
|
||||
static int bpf_perf_link_fill_common(const struct perf_event *event,
|
||||
char __user *uname, u32 ulen,
|
||||
char __user *uname, u32 *ulenp,
|
||||
u64 *probe_offset, u64 *probe_addr,
|
||||
u32 *fd_type, unsigned long *missed)
|
||||
{
|
||||
const char *buf;
|
||||
u32 prog_id;
|
||||
u32 prog_id, ulen;
|
||||
size_t len;
|
||||
int err;
|
||||
|
||||
ulen = *ulenp;
|
||||
if (!ulen ^ !uname)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -3682,10 +3687,17 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
|
||||
probe_offset, probe_addr, missed);
|
||||
if (err)
|
||||
return err;
|
||||
if (!uname)
|
||||
return 0;
|
||||
|
||||
if (buf) {
|
||||
len = strlen(buf);
|
||||
*ulenp = len + 1;
|
||||
} else {
|
||||
*ulenp = 1;
|
||||
}
|
||||
if (!uname)
|
||||
return 0;
|
||||
|
||||
if (buf) {
|
||||
err = bpf_copy_to_user(uname, buf, ulen, len);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -3710,7 +3722,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
|
||||
|
||||
uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
|
||||
ulen = info->perf_event.kprobe.name_len;
|
||||
err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
|
||||
err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
|
||||
&type, &missed);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -3718,7 +3730,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
|
||||
info->perf_event.type = BPF_PERF_EVENT_KRETPROBE;
|
||||
else
|
||||
info->perf_event.type = BPF_PERF_EVENT_KPROBE;
|
||||
|
||||
info->perf_event.kprobe.name_len = ulen;
|
||||
info->perf_event.kprobe.offset = offset;
|
||||
info->perf_event.kprobe.missed = missed;
|
||||
if (!kallsyms_show_value(current_cred()))
|
||||
@@ -3740,7 +3752,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
|
||||
|
||||
uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
|
||||
ulen = info->perf_event.uprobe.name_len;
|
||||
err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
|
||||
err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
|
||||
&type, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -3749,6 +3761,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
|
||||
info->perf_event.type = BPF_PERF_EVENT_URETPROBE;
|
||||
else
|
||||
info->perf_event.type = BPF_PERF_EVENT_UPROBE;
|
||||
info->perf_event.uprobe.name_len = ulen;
|
||||
info->perf_event.uprobe.offset = offset;
|
||||
info->perf_event.uprobe.cookie = event->bpf_cookie;
|
||||
return 0;
|
||||
@@ -3774,12 +3787,18 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
|
||||
{
|
||||
char __user *uname;
|
||||
u32 ulen;
|
||||
int err;
|
||||
|
||||
uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
|
||||
ulen = info->perf_event.tracepoint.name_len;
|
||||
err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
|
||||
info->perf_event.tracepoint.name_len = ulen;
|
||||
info->perf_event.tracepoint.cookie = event->bpf_cookie;
|
||||
return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
|
||||
@@ -5978,7 +5997,7 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
|
||||
.arg1_type = ARG_PTR_TO_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
|
||||
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
|
||||
.arg4_size = sizeof(u64),
|
||||
};
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *co
|
||||
rcu_read_lock();
|
||||
pid = find_pid_ns(common->pid, common->ns);
|
||||
if (pid) {
|
||||
task = get_pid_task(pid, PIDTYPE_TGID);
|
||||
task = get_pid_task(pid, PIDTYPE_PID);
|
||||
*tid = common->pid;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -2750,10 +2750,16 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
|
||||
b->module = mod;
|
||||
b->offset = offset;
|
||||
|
||||
/* sort() reorders entries by value, so b may no longer point
|
||||
* to the right entry after this
|
||||
*/
|
||||
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
|
||||
kfunc_btf_cmp_by_off, NULL);
|
||||
} else {
|
||||
btf = b->btf;
|
||||
}
|
||||
return b->btf;
|
||||
|
||||
return btf;
|
||||
}
|
||||
|
||||
void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
|
||||
@@ -6360,10 +6366,10 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
|
||||
|
||||
/* both of s64_max/s64_min positive or negative */
|
||||
if ((s64_max >= 0) == (s64_min >= 0)) {
|
||||
reg->smin_value = reg->s32_min_value = s64_min;
|
||||
reg->smax_value = reg->s32_max_value = s64_max;
|
||||
reg->umin_value = reg->u32_min_value = s64_min;
|
||||
reg->umax_value = reg->u32_max_value = s64_max;
|
||||
reg->s32_min_value = reg->smin_value = s64_min;
|
||||
reg->s32_max_value = reg->smax_value = s64_max;
|
||||
reg->u32_min_value = reg->umin_value = s64_min;
|
||||
reg->u32_max_value = reg->umax_value = s64_max;
|
||||
reg->var_off = tnum_range(s64_min, s64_max);
|
||||
return;
|
||||
}
|
||||
@@ -7459,7 +7465,8 @@ static int check_stack_range_initialized(
|
||||
}
|
||||
|
||||
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
int access_size, bool zero_size_allowed,
|
||||
int access_size, enum bpf_access_type access_type,
|
||||
bool zero_size_allowed,
|
||||
struct bpf_call_arg_meta *meta)
|
||||
{
|
||||
struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
|
||||
@@ -7471,7 +7478,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
return check_packet_access(env, regno, reg->off, access_size,
|
||||
zero_size_allowed);
|
||||
case PTR_TO_MAP_KEY:
|
||||
if (meta && meta->raw_mode) {
|
||||
if (access_type == BPF_WRITE) {
|
||||
verbose(env, "R%d cannot write into %s\n", regno,
|
||||
reg_type_str(env, reg->type));
|
||||
return -EACCES;
|
||||
@@ -7479,15 +7486,13 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
return check_mem_region_access(env, regno, reg->off, access_size,
|
||||
reg->map_ptr->key_size, false);
|
||||
case PTR_TO_MAP_VALUE:
|
||||
if (check_map_access_type(env, regno, reg->off, access_size,
|
||||
meta && meta->raw_mode ? BPF_WRITE :
|
||||
BPF_READ))
|
||||
if (check_map_access_type(env, regno, reg->off, access_size, access_type))
|
||||
return -EACCES;
|
||||
return check_map_access(env, regno, reg->off, access_size,
|
||||
zero_size_allowed, ACCESS_HELPER);
|
||||
case PTR_TO_MEM:
|
||||
if (type_is_rdonly_mem(reg->type)) {
|
||||
if (meta && meta->raw_mode) {
|
||||
if (access_type == BPF_WRITE) {
|
||||
verbose(env, "R%d cannot write into %s\n", regno,
|
||||
reg_type_str(env, reg->type));
|
||||
return -EACCES;
|
||||
@@ -7498,7 +7503,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
zero_size_allowed);
|
||||
case PTR_TO_BUF:
|
||||
if (type_is_rdonly_mem(reg->type)) {
|
||||
if (meta && meta->raw_mode) {
|
||||
if (access_type == BPF_WRITE) {
|
||||
verbose(env, "R%d cannot write into %s\n", regno,
|
||||
reg_type_str(env, reg->type));
|
||||
return -EACCES;
|
||||
@@ -7526,7 +7531,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
* Dynamically check it now.
|
||||
*/
|
||||
if (!env->ops->convert_ctx_access) {
|
||||
enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
|
||||
int offset = access_size - 1;
|
||||
|
||||
/* Allow zero-byte read from PTR_TO_CTX */
|
||||
@@ -7534,7 +7538,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
return zero_size_allowed ? 0 : -EACCES;
|
||||
|
||||
return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
|
||||
atype, -1, false, false);
|
||||
access_type, -1, false, false);
|
||||
}
|
||||
|
||||
fallthrough;
|
||||
@@ -7559,6 +7563,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
|
||||
*/
|
||||
static int check_mem_size_reg(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg, u32 regno,
|
||||
enum bpf_access_type access_type,
|
||||
bool zero_size_allowed,
|
||||
struct bpf_call_arg_meta *meta)
|
||||
{
|
||||
@@ -7574,15 +7579,12 @@ static int check_mem_size_reg(struct bpf_verifier_env *env,
|
||||
*/
|
||||
meta->msize_max_value = reg->umax_value;
|
||||
|
||||
/* The register is SCALAR_VALUE; the access check
|
||||
* happens using its boundaries.
|
||||
/* The register is SCALAR_VALUE; the access check happens using
|
||||
* its boundaries. For unprivileged variable accesses, disable
|
||||
* raw mode so that the program is required to initialize all
|
||||
* the memory that the helper could just partially fill up.
|
||||
*/
|
||||
if (!tnum_is_const(reg->var_off))
|
||||
/* For unprivileged variable accesses, disable raw
|
||||
* mode so that the program is required to
|
||||
* initialize all the memory that the helper could
|
||||
* just partially fill up.
|
||||
*/
|
||||
meta = NULL;
|
||||
|
||||
if (reg->smin_value < 0) {
|
||||
@@ -7602,9 +7604,8 @@ static int check_mem_size_reg(struct bpf_verifier_env *env,
|
||||
regno);
|
||||
return -EACCES;
|
||||
}
|
||||
err = check_helper_mem_access(env, regno - 1,
|
||||
reg->umax_value,
|
||||
zero_size_allowed, meta);
|
||||
err = check_helper_mem_access(env, regno - 1, reg->umax_value,
|
||||
access_type, zero_size_allowed, meta);
|
||||
if (!err)
|
||||
err = mark_chain_precision(env, regno);
|
||||
return err;
|
||||
@@ -7615,13 +7616,11 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg
|
||||
{
|
||||
bool may_be_null = type_may_be_null(reg->type);
|
||||
struct bpf_reg_state saved_reg;
|
||||
struct bpf_call_arg_meta meta;
|
||||
int err;
|
||||
|
||||
if (register_is_null(reg))
|
||||
return 0;
|
||||
|
||||
memset(&meta, 0, sizeof(meta));
|
||||
/* Assuming that the register contains a value check if the memory
|
||||
* access is safe. Temporarily save and restore the register's state as
|
||||
* the conversion shouldn't be visible to a caller.
|
||||
@@ -7631,10 +7630,8 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg
|
||||
mark_ptr_not_null_reg(reg);
|
||||
}
|
||||
|
||||
err = check_helper_mem_access(env, regno, mem_size, true, &meta);
|
||||
/* Check access for BPF_WRITE */
|
||||
meta.raw_mode = true;
|
||||
err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
|
||||
err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
|
||||
err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
|
||||
|
||||
if (may_be_null)
|
||||
*reg = saved_reg;
|
||||
@@ -7660,13 +7657,12 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg
|
||||
mark_ptr_not_null_reg(mem_reg);
|
||||
}
|
||||
|
||||
err = check_mem_size_reg(env, reg, regno, true, &meta);
|
||||
/* Check access for BPF_WRITE */
|
||||
meta.raw_mode = true;
|
||||
err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
|
||||
err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
|
||||
err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
|
||||
|
||||
if (may_be_null)
|
||||
*mem_reg = saved_reg;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -8969,9 +8965,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
|
||||
verbose(env, "invalid map_ptr to access map->key\n");
|
||||
return -EACCES;
|
||||
}
|
||||
err = check_helper_mem_access(env, regno,
|
||||
meta->map_ptr->key_size, false,
|
||||
NULL);
|
||||
err = check_helper_mem_access(env, regno, meta->map_ptr->key_size,
|
||||
BPF_READ, false, NULL);
|
||||
break;
|
||||
case ARG_PTR_TO_MAP_VALUE:
|
||||
if (type_may_be_null(arg_type) && register_is_null(reg))
|
||||
@@ -8986,9 +8981,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
|
||||
return -EACCES;
|
||||
}
|
||||
meta->raw_mode = arg_type & MEM_UNINIT;
|
||||
err = check_helper_mem_access(env, regno,
|
||||
meta->map_ptr->value_size, false,
|
||||
meta);
|
||||
err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
|
||||
arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
|
||||
false, meta);
|
||||
break;
|
||||
case ARG_PTR_TO_PERCPU_BTF_ID:
|
||||
if (!reg->btf_id) {
|
||||
@@ -9030,7 +9025,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
|
||||
*/
|
||||
meta->raw_mode = arg_type & MEM_UNINIT;
|
||||
if (arg_type & MEM_FIXED_SIZE) {
|
||||
err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
|
||||
err = check_helper_mem_access(env, regno, fn->arg_size[arg],
|
||||
arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
|
||||
false, meta);
|
||||
if (err)
|
||||
return err;
|
||||
if (arg_type & MEM_ALIGNED)
|
||||
@@ -9038,10 +9035,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
|
||||
}
|
||||
break;
|
||||
case ARG_CONST_SIZE:
|
||||
err = check_mem_size_reg(env, reg, regno, false, meta);
|
||||
err = check_mem_size_reg(env, reg, regno,
|
||||
fn->arg_type[arg - 1] & MEM_WRITE ?
|
||||
BPF_WRITE : BPF_READ,
|
||||
false, meta);
|
||||
break;
|
||||
case ARG_CONST_SIZE_OR_ZERO:
|
||||
err = check_mem_size_reg(env, reg, regno, true, meta);
|
||||
err = check_mem_size_reg(env, reg, regno,
|
||||
fn->arg_type[arg - 1] & MEM_WRITE ?
|
||||
BPF_WRITE : BPF_READ,
|
||||
true, meta);
|
||||
break;
|
||||
case ARG_PTR_TO_DYNPTR:
|
||||
err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
|
||||
@@ -14296,12 +14299,13 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
|
||||
* r1 += 0x1
|
||||
* if r2 < 1000 goto ...
|
||||
* use r1 in memory access
|
||||
* So remember constant delta between r2 and r1 and update r1 after
|
||||
* 'if' condition.
|
||||
* So for 64-bit alu remember constant delta between r2 and r1 and
|
||||
* update r1 after 'if' condition.
|
||||
*/
|
||||
if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD &&
|
||||
dst_reg->id && is_reg_const(src_reg, alu32)) {
|
||||
u64 val = reg_const_value(src_reg, alu32);
|
||||
if (env->bpf_capable &&
|
||||
BPF_OP(insn->code) == BPF_ADD && !alu32 &&
|
||||
dst_reg->id && is_reg_const(src_reg, false)) {
|
||||
u64 val = reg_const_value(src_reg, false);
|
||||
|
||||
if ((dst_reg->id & BPF_ADD_CONST) ||
|
||||
/* prevent overflow in sync_linked_regs() later */
|
||||
@@ -15358,8 +15362,12 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
|
||||
continue;
|
||||
if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
|
||||
reg->off == known_reg->off) {
|
||||
s32 saved_subreg_def = reg->subreg_def;
|
||||
|
||||
copy_register_state(reg, known_reg);
|
||||
reg->subreg_def = saved_subreg_def;
|
||||
} else {
|
||||
s32 saved_subreg_def = reg->subreg_def;
|
||||
s32 saved_off = reg->off;
|
||||
|
||||
fake_reg.type = SCALAR_VALUE;
|
||||
@@ -15372,6 +15380,7 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
|
||||
* otherwise another sync_linked_regs() will be incorrect.
|
||||
*/
|
||||
reg->off = saved_off;
|
||||
reg->subreg_def = saved_subreg_def;
|
||||
|
||||
scalar32_min_max_add(reg, &fake_reg);
|
||||
scalar_min_max_add(reg, &fake_reg);
|
||||
@@ -21230,7 +21239,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
|
||||
delta += cnt - 1;
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
goto next_insn;
|
||||
}
|
||||
|
||||
/* Implement bpf_kptr_xchg inline */
|
||||
@@ -22339,7 +22348,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
|
||||
/* 'struct bpf_verifier_env' can be global, but since it's not small,
|
||||
* allocate/free it every time bpf_check() is called
|
||||
*/
|
||||
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
|
||||
env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
|
||||
if (!env)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -22575,6 +22584,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
|
||||
mutex_unlock(&bpf_verifier_lock);
|
||||
vfree(env->insn_aux_data);
|
||||
err_free_env:
|
||||
kfree(env);
|
||||
kvfree(env);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -9251,7 +9251,7 @@ static void perf_event_switch(struct task_struct *task,
|
||||
},
|
||||
};
|
||||
|
||||
if (!sched_in && task->on_rq) {
|
||||
if (!sched_in && task_is_runnable(task)) {
|
||||
switch_event.event_id.header.misc |=
|
||||
PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
|
||||
}
|
||||
|
||||
@@ -1756,33 +1756,30 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
|
||||
int no_files)
|
||||
{
|
||||
struct files_struct *oldf, *newf;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* A background process may not have any files ...
|
||||
*/
|
||||
oldf = current->files;
|
||||
if (!oldf)
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
if (no_files) {
|
||||
tsk->files = NULL;
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (clone_flags & CLONE_FILES) {
|
||||
atomic_inc(&oldf->count);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
newf = dup_fd(oldf, NR_OPEN_MAX, &error);
|
||||
if (!newf)
|
||||
goto out;
|
||||
newf = dup_fd(oldf, NULL);
|
||||
if (IS_ERR(newf))
|
||||
return PTR_ERR(newf);
|
||||
|
||||
tsk->files = newf;
|
||||
error = 0;
|
||||
out:
|
||||
return error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
||||
@@ -3238,17 +3235,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
|
||||
/*
|
||||
* Unshare file descriptor table if it is being shared
|
||||
*/
|
||||
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
||||
struct files_struct **new_fdp)
|
||||
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
|
||||
{
|
||||
struct files_struct *fd = current->files;
|
||||
int error = 0;
|
||||
|
||||
if ((unshare_flags & CLONE_FILES) &&
|
||||
(fd && atomic_read(&fd->count) > 1)) {
|
||||
*new_fdp = dup_fd(fd, max_fds, &error);
|
||||
if (!*new_fdp)
|
||||
return error;
|
||||
fd = dup_fd(fd, NULL);
|
||||
if (IS_ERR(fd))
|
||||
return PTR_ERR(fd);
|
||||
*new_fdp = fd;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -3306,7 +3302,7 @@ int ksys_unshare(unsigned long unshare_flags)
|
||||
err = unshare_fs(unshare_flags, &new_fs);
|
||||
if (err)
|
||||
goto bad_unshare_out;
|
||||
err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
|
||||
err = unshare_fd(unshare_flags, &new_fd);
|
||||
if (err)
|
||||
goto bad_unshare_cleanup_fs;
|
||||
err = unshare_userns(unshare_flags, &new_cred);
|
||||
@@ -3398,7 +3394,7 @@ int unshare_files(void)
|
||||
struct files_struct *old, *copy = NULL;
|
||||
int error;
|
||||
|
||||
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©);
|
||||
error = unshare_fd(CLONE_FILES, ©);
|
||||
if (error || !copy)
|
||||
return error;
|
||||
|
||||
|
||||
@@ -109,7 +109,12 @@ static int __set_task_frozen(struct task_struct *p, void *arg)
|
||||
{
|
||||
unsigned int state = READ_ONCE(p->__state);
|
||||
|
||||
if (p->on_rq)
|
||||
/*
|
||||
* Allow freezing the sched_delayed tasks; they will not execute until
|
||||
* ttwu() fixes them up, so it is safe to swap their state now, instead
|
||||
* of waiting for them to get fully dequeued.
|
||||
*/
|
||||
if (task_is_runnable(p))
|
||||
return 0;
|
||||
|
||||
if (p != current && task_curr(p))
|
||||
|
||||
@@ -623,6 +623,8 @@ void kthread_unpark(struct task_struct *k)
|
||||
{
|
||||
struct kthread *kthread = to_kthread(k);
|
||||
|
||||
if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))
|
||||
return;
|
||||
/*
|
||||
* Newly created kthread was parked when the CPU was offline.
|
||||
* The binding was lost and we need to set it again.
|
||||
|
||||
@@ -985,6 +985,15 @@ static bool rcu_tasks_is_holdout(struct task_struct *t)
|
||||
if (!READ_ONCE(t->on_rq))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* t->on_rq && !t->se.sched_delayed *could* be considered sleeping but
|
||||
* since it is a spurious state (it will transition into the
|
||||
* traditional blocked state or get woken up without outside
|
||||
* dependencies), not considering it such should only affect timing.
|
||||
*
|
||||
* Be conservative for now and not include it.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Idle tasks (or idle injection) within the idle loop are RCU-tasks
|
||||
* quiescent states. But CPU boot code performed by the idle task
|
||||
|
||||
@@ -3607,11 +3607,12 @@ kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
|
||||
}
|
||||
|
||||
// One work is per one batch, so there are three
|
||||
// "free channels", the batch can handle. It can
|
||||
// be that the work is in the pending state when
|
||||
// channels have been detached following by each
|
||||
// other.
|
||||
// "free channels", the batch can handle. Break
|
||||
// the loop since it is done with this CPU thus
|
||||
// queuing an RCU work is _always_ success here.
|
||||
queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
|
||||
WARN_ON_ONCE(!queued);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -554,13 +554,19 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
|
||||
rcu_nocb_unlock(rdp);
|
||||
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
|
||||
TPS("WakeLazy"));
|
||||
} else if (!irqs_disabled_flags(flags)) {
|
||||
} else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) {
|
||||
/* ... if queue was empty ... */
|
||||
rcu_nocb_unlock(rdp);
|
||||
wake_nocb_gp(rdp, false);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
|
||||
TPS("WakeEmpty"));
|
||||
} else {
|
||||
/*
|
||||
* Don't do the wake-up upfront on fragile paths.
|
||||
* Also offline CPUs can't call swake_up_one_online() from
|
||||
* (soft-)IRQs. Rely on the final deferred wake-up from
|
||||
* rcutree_report_cpu_dead()
|
||||
*/
|
||||
rcu_nocb_unlock(rdp);
|
||||
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
|
||||
TPS("WakeEmptyIsDeferred"));
|
||||
|
||||
@@ -169,6 +169,8 @@ static void resource_test_intersection(struct kunit *test)
|
||||
#define RES_TEST_RAM3_SIZE SZ_1M
|
||||
#define RES_TEST_TOTAL_SIZE ((RES_TEST_WIN1_OFFSET + RES_TEST_WIN1_SIZE))
|
||||
|
||||
KUNIT_DEFINE_ACTION_WRAPPER(kfree_wrapper, kfree, const void *);
|
||||
|
||||
static void remove_free_resource(void *ctx)
|
||||
{
|
||||
struct resource *res = (struct resource *)ctx;
|
||||
@@ -177,6 +179,14 @@ static void remove_free_resource(void *ctx)
|
||||
kfree(res);
|
||||
}
|
||||
|
||||
static void resource_test_add_action_or_abort(
|
||||
struct kunit *test, void (*action)(void *), void *ctx)
|
||||
{
|
||||
KUNIT_ASSERT_EQ_MSG(test, 0,
|
||||
kunit_add_action_or_reset(test, action, ctx),
|
||||
"Fail to add action");
|
||||
}
|
||||
|
||||
static void resource_test_request_region(struct kunit *test, struct resource *parent,
|
||||
resource_size_t start, resource_size_t size,
|
||||
const char *name, unsigned long flags)
|
||||
@@ -185,7 +195,7 @@ static void resource_test_request_region(struct kunit *test, struct resource *pa
|
||||
|
||||
res = __request_region(parent, start, size, name, flags);
|
||||
KUNIT_ASSERT_NOT_NULL(test, res);
|
||||
kunit_add_action_or_reset(test, remove_free_resource, res);
|
||||
resource_test_add_action_or_abort(test, remove_free_resource, res);
|
||||
}
|
||||
|
||||
static void resource_test_insert_resource(struct kunit *test, struct resource *parent,
|
||||
@@ -202,11 +212,11 @@ static void resource_test_insert_resource(struct kunit *test, struct resource *p
|
||||
res->end = start + size - 1;
|
||||
res->flags = flags;
|
||||
if (insert_resource(parent, res)) {
|
||||
kfree(res);
|
||||
resource_test_add_action_or_abort(test, kfree_wrapper, res);
|
||||
KUNIT_FAIL_AND_ABORT(test, "Fail to insert resource %pR\n", res);
|
||||
}
|
||||
|
||||
kunit_add_action_or_reset(test, remove_free_resource, res);
|
||||
resource_test_add_action_or_abort(test, remove_free_resource, res);
|
||||
}
|
||||
|
||||
static void resource_test_region_intersects(struct kunit *test)
|
||||
@@ -220,7 +230,7 @@ static void resource_test_region_intersects(struct kunit *test)
|
||||
"test resources");
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, parent);
|
||||
start = parent->start;
|
||||
kunit_add_action_or_reset(test, remove_free_resource, parent);
|
||||
resource_test_add_action_or_abort(test, remove_free_resource, parent);
|
||||
|
||||
resource_test_request_region(test, parent, start + RES_TEST_RAM0_OFFSET,
|
||||
RES_TEST_RAM0_SIZE, "Test System RAM 0", flags);
|
||||
|
||||
@@ -548,6 +548,11 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
|
||||
* ON_RQ_MIGRATING state is used for migration without holding both
|
||||
* rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
|
||||
*
|
||||
* Additionally it is possible to be ->on_rq but still be considered not
|
||||
* runnable when p->se.sched_delayed is true. These tasks are on the runqueue
|
||||
* but will be dequeued as soon as they get picked again. See the
|
||||
* task_is_runnable() helper.
|
||||
*
|
||||
* p->on_cpu <- { 0, 1 }:
|
||||
*
|
||||
* is set by prepare_task() and cleared by finish_task() such that it will be
|
||||
@@ -2012,11 +2017,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (!(flags & ENQUEUE_NOCLOCK))
|
||||
update_rq_clock(rq);
|
||||
|
||||
if (!(flags & ENQUEUE_RESTORE)) {
|
||||
sched_info_enqueue(rq, p);
|
||||
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
|
||||
}
|
||||
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
/*
|
||||
* Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
|
||||
@@ -2024,6 +2024,11 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
*/
|
||||
uclamp_rq_inc(rq, p);
|
||||
|
||||
if (!(flags & ENQUEUE_RESTORE)) {
|
||||
sched_info_enqueue(rq, p);
|
||||
psi_enqueue(p, flags & ENQUEUE_MIGRATED);
|
||||
}
|
||||
|
||||
if (sched_core_enabled(rq))
|
||||
sched_core_enqueue(rq, p);
|
||||
}
|
||||
@@ -2041,7 +2046,7 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
|
||||
if (!(flags & DEQUEUE_SAVE)) {
|
||||
sched_info_dequeue(rq, p);
|
||||
psi_dequeue(p, flags & DEQUEUE_SLEEP);
|
||||
psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3518,14 +3523,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
|
||||
int select_task_rq(struct task_struct *p, int cpu, int *wake_flags)
|
||||
{
|
||||
lockdep_assert_held(&p->pi_lock);
|
||||
|
||||
if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
|
||||
else
|
||||
if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags);
|
||||
*wake_flags |= WF_RQ_SELECTED;
|
||||
} else {
|
||||
cpu = cpumask_any(p->cpus_ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* In order not to call set_task_cpu() on a blocking task we need
|
||||
@@ -3659,6 +3666,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
|
||||
rq->nr_uninterruptible--;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (wake_flags & WF_RQ_SELECTED)
|
||||
en_flags |= ENQUEUE_RQ_SELECTED;
|
||||
if (wake_flags & WF_MIGRATED)
|
||||
en_flags |= ENQUEUE_MIGRATED;
|
||||
else
|
||||
@@ -4120,6 +4129,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
guard(preempt)();
|
||||
int cpu, success = 0;
|
||||
|
||||
wake_flags |= WF_TTWU;
|
||||
|
||||
if (p == current) {
|
||||
/*
|
||||
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
|
||||
@@ -4252,7 +4263,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
*/
|
||||
smp_cond_load_acquire(&p->on_cpu, !VAL);
|
||||
|
||||
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
|
||||
cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
|
||||
if (task_cpu(p) != cpu) {
|
||||
if (p->in_iowait) {
|
||||
delayacct_blkio_end(p);
|
||||
@@ -4317,9 +4328,10 @@ static bool __task_needs_rq_lock(struct task_struct *p)
|
||||
* @arg: Argument to function.
|
||||
*
|
||||
* Fix the task in it's current state by avoiding wakeups and or rq operations
|
||||
* and call @func(@arg) on it. This function can use ->on_rq and task_curr()
|
||||
* to work out what the state is, if required. Given that @func can be invoked
|
||||
* with a runqueue lock held, it had better be quite lightweight.
|
||||
* and call @func(@arg) on it. This function can use task_is_runnable() and
|
||||
* task_curr() to work out what the state is, if required. Given that @func
|
||||
* can be invoked with a runqueue lock held, it had better be quite
|
||||
* lightweight.
|
||||
*
|
||||
* Returns:
|
||||
* Whatever @func returns
|
||||
@@ -4793,6 +4805,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
{
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int wake_flags = WF_FORK;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
|
||||
WRITE_ONCE(p->__state, TASK_RUNNING);
|
||||
@@ -4807,7 +4820,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
*/
|
||||
p->recent_used_cpu = task_cpu(p);
|
||||
rseq_migrate(p);
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK));
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
|
||||
#endif
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
update_rq_clock(rq);
|
||||
@@ -4815,7 +4828,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
|
||||
activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
|
||||
trace_sched_wakeup_new(p);
|
||||
wakeup_preempt(rq, p, WF_FORK);
|
||||
wakeup_preempt(rq, p, wake_flags);
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->sched_class->task_woken) {
|
||||
/*
|
||||
@@ -6537,6 +6550,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
* as a preemption by schedule_debug() and RCU.
|
||||
*/
|
||||
bool preempt = sched_mode > SM_NONE;
|
||||
bool block = false;
|
||||
unsigned long *switch_count;
|
||||
unsigned long prev_state;
|
||||
struct rq_flags rf;
|
||||
@@ -6622,6 +6636,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
* After this, schedule() must not care about p->state any more.
|
||||
*/
|
||||
block_task(rq, prev, flags);
|
||||
block = true;
|
||||
}
|
||||
switch_count = &prev->nvcsw;
|
||||
}
|
||||
@@ -6667,7 +6682,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
|
||||
migrate_disable_switch(rq, prev);
|
||||
psi_account_irqtime(rq, prev, next);
|
||||
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
|
||||
psi_sched_switch(prev, next, block);
|
||||
|
||||
trace_sched_switch(preempt, prev, next, prev_state);
|
||||
|
||||
@@ -7010,20 +7025,20 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
|
||||
}
|
||||
EXPORT_SYMBOL(default_wake_function);
|
||||
|
||||
void __setscheduler_prio(struct task_struct *p, int prio)
|
||||
const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
|
||||
{
|
||||
if (dl_prio(prio))
|
||||
p->sched_class = &dl_sched_class;
|
||||
else if (rt_prio(prio))
|
||||
p->sched_class = &rt_sched_class;
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
else if (task_should_scx(p))
|
||||
p->sched_class = &ext_sched_class;
|
||||
#endif
|
||||
else
|
||||
p->sched_class = &fair_sched_class;
|
||||
return &dl_sched_class;
|
||||
|
||||
p->prio = prio;
|
||||
if (rt_prio(prio))
|
||||
return &rt_sched_class;
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
if (task_should_scx(p))
|
||||
return &ext_sched_class;
|
||||
#endif
|
||||
|
||||
return &fair_sched_class;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
@@ -7069,7 +7084,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
||||
{
|
||||
int prio, oldprio, queued, running, queue_flag =
|
||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
const struct sched_class *prev_class;
|
||||
const struct sched_class *prev_class, *next_class;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
@@ -7127,6 +7142,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
||||
queue_flag &= ~DEQUEUE_MOVE;
|
||||
|
||||
prev_class = p->sched_class;
|
||||
next_class = __setscheduler_class(p, prio);
|
||||
|
||||
if (prev_class != next_class && p->se.sched_delayed)
|
||||
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
@@ -7164,7 +7184,9 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
||||
p->rt.timeout = 0;
|
||||
}
|
||||
|
||||
__setscheduler_prio(p, prio);
|
||||
p->sched_class = next_class;
|
||||
p->prio = prio;
|
||||
|
||||
check_class_changing(rq, p, prev_class);
|
||||
|
||||
if (queued)
|
||||
@@ -10458,7 +10480,9 @@ void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
|
||||
return;
|
||||
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
|
||||
return;
|
||||
task_work_add(curr, work, TWA_RESUME);
|
||||
|
||||
/* No page allocation under rq lock */
|
||||
task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
|
||||
}
|
||||
|
||||
void sched_mm_cid_exit_signals(struct task_struct *t)
|
||||
|
||||
@@ -2385,7 +2385,7 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
|
||||
|
||||
deadline_queue_push_tasks(rq);
|
||||
|
||||
if (hrtick_enabled(rq))
|
||||
if (hrtick_enabled_dl(rq))
|
||||
start_hrtick_dl(rq, &p->dl);
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
|
||||
|
||||
enum scx_consts {
|
||||
SCX_SLICE_BYPASS = SCX_SLICE_DFL / 4,
|
||||
SCX_DSP_DFL_MAX_BATCH = 32,
|
||||
SCX_DSP_MAX_LOOPS = 32,
|
||||
SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
|
||||
@@ -19,6 +18,12 @@ enum scx_consts {
|
||||
SCX_EXIT_DUMP_DFL_LEN = 32768,
|
||||
|
||||
SCX_CPUPERF_ONE = SCHED_CAPACITY_SCALE,
|
||||
|
||||
/*
|
||||
* Iterating all tasks may take a while. Periodically drop
|
||||
* scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
|
||||
*/
|
||||
SCX_OPS_TASK_ITER_BATCH = 32,
|
||||
};
|
||||
|
||||
enum scx_exit_kind {
|
||||
@@ -625,6 +630,10 @@ struct sched_ext_ops {
|
||||
/**
|
||||
* exit - Clean up after the BPF scheduler
|
||||
* @info: Exit info
|
||||
*
|
||||
* ops.exit() is also called on ops.init() failure, which is a bit
|
||||
* unusual. This is to allow rich reporting through @info on how
|
||||
* ops.init() failed.
|
||||
*/
|
||||
void (*exit)(struct scx_exit_info *info);
|
||||
|
||||
@@ -692,6 +701,7 @@ enum scx_enq_flags {
|
||||
/* expose select ENQUEUE_* flags as enums */
|
||||
SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
|
||||
SCX_ENQ_HEAD = ENQUEUE_HEAD,
|
||||
SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
|
||||
|
||||
/* high 32bits are SCX specific */
|
||||
|
||||
@@ -1269,86 +1279,104 @@ struct scx_task_iter {
|
||||
struct task_struct *locked;
|
||||
struct rq *rq;
|
||||
struct rq_flags rf;
|
||||
u32 cnt;
|
||||
};
|
||||
|
||||
/**
|
||||
* scx_task_iter_init - Initialize a task iterator
|
||||
* scx_task_iter_start - Lock scx_tasks_lock and start a task iteration
|
||||
* @iter: iterator to init
|
||||
*
|
||||
* Initialize @iter. Must be called with scx_tasks_lock held. Once initialized,
|
||||
* @iter must eventually be exited with scx_task_iter_exit().
|
||||
* Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter
|
||||
* must eventually be stopped with scx_task_iter_stop().
|
||||
*
|
||||
* scx_tasks_lock may be released between this and the first next() call or
|
||||
* between any two next() calls. If scx_tasks_lock is released between two
|
||||
* next() calls, the caller is responsible for ensuring that the task being
|
||||
* iterated remains accessible either through RCU read lock or obtaining a
|
||||
* reference count.
|
||||
* scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock()
|
||||
* between this and the first next() call or between any two next() calls. If
|
||||
* the locks are released between two next() calls, the caller is responsible
|
||||
* for ensuring that the task being iterated remains accessible either through
|
||||
* RCU read lock or obtaining a reference count.
|
||||
*
|
||||
* All tasks which existed when the iteration started are guaranteed to be
|
||||
* visited as long as they still exist.
|
||||
*/
|
||||
static void scx_task_iter_init(struct scx_task_iter *iter)
|
||||
static void scx_task_iter_start(struct scx_task_iter *iter)
|
||||
{
|
||||
lockdep_assert_held(&scx_tasks_lock);
|
||||
|
||||
BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS &
|
||||
((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1));
|
||||
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
|
||||
iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR };
|
||||
list_add(&iter->cursor.tasks_node, &scx_tasks);
|
||||
iter->locked = NULL;
|
||||
iter->cnt = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_task_iter_rq_unlock - Unlock rq locked by a task iterator
|
||||
* @iter: iterator to unlock rq for
|
||||
*
|
||||
* If @iter is in the middle of a locked iteration, it may be locking the rq of
|
||||
* the task currently being visited. Unlock the rq if so. This function can be
|
||||
* safely called anytime during an iteration.
|
||||
*
|
||||
* Returns %true if the rq @iter was locking is unlocked. %false if @iter was
|
||||
* not locking an rq.
|
||||
*/
|
||||
static bool scx_task_iter_rq_unlock(struct scx_task_iter *iter)
|
||||
static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter)
|
||||
{
|
||||
if (iter->locked) {
|
||||
task_rq_unlock(iter->rq, iter->locked, &iter->rf);
|
||||
iter->locked = NULL;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_task_iter_exit - Exit a task iterator
|
||||
* scx_task_iter_unlock - Unlock rq and scx_tasks_lock held by a task iterator
|
||||
* @iter: iterator to unlock
|
||||
*
|
||||
* If @iter is in the middle of a locked iteration, it may be locking the rq of
|
||||
* the task currently being visited in addition to scx_tasks_lock. Unlock both.
|
||||
* This function can be safely called anytime during an iteration.
|
||||
*/
|
||||
static void scx_task_iter_unlock(struct scx_task_iter *iter)
|
||||
{
|
||||
__scx_task_iter_rq_unlock(iter);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_task_iter_relock - Lock scx_tasks_lock released by scx_task_iter_unlock()
|
||||
* @iter: iterator to re-lock
|
||||
*
|
||||
* Re-lock scx_tasks_lock unlocked by scx_task_iter_unlock(). Note that it
|
||||
* doesn't re-lock the rq lock. Must be called before other iterator operations.
|
||||
*/
|
||||
static void scx_task_iter_relock(struct scx_task_iter *iter)
|
||||
{
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_task_iter_stop - Stop a task iteration and unlock scx_tasks_lock
|
||||
* @iter: iterator to exit
|
||||
*
|
||||
* Exit a previously initialized @iter. Must be called with scx_tasks_lock held.
|
||||
* If the iterator holds a task's rq lock, that rq lock is released. See
|
||||
* scx_task_iter_init() for details.
|
||||
* Exit a previously initialized @iter. Must be called with scx_tasks_lock held
|
||||
* which is released on return. If the iterator holds a task's rq lock, that rq
|
||||
* lock is also released. See scx_task_iter_start() for details.
|
||||
*/
|
||||
static void scx_task_iter_exit(struct scx_task_iter *iter)
|
||||
static void scx_task_iter_stop(struct scx_task_iter *iter)
|
||||
{
|
||||
lockdep_assert_held(&scx_tasks_lock);
|
||||
|
||||
scx_task_iter_rq_unlock(iter);
|
||||
list_del_init(&iter->cursor.tasks_node);
|
||||
scx_task_iter_unlock(iter);
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_task_iter_next - Next task
|
||||
* @iter: iterator to walk
|
||||
*
|
||||
* Visit the next task. See scx_task_iter_init() for details.
|
||||
* Visit the next task. See scx_task_iter_start() for details. Locks are dropped
|
||||
* and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing
|
||||
* stalls by holding scx_tasks_lock for too long.
|
||||
*/
|
||||
static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
|
||||
{
|
||||
struct list_head *cursor = &iter->cursor.tasks_node;
|
||||
struct sched_ext_entity *pos;
|
||||
|
||||
lockdep_assert_held(&scx_tasks_lock);
|
||||
if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) {
|
||||
scx_task_iter_unlock(iter);
|
||||
cond_resched();
|
||||
scx_task_iter_relock(iter);
|
||||
}
|
||||
|
||||
list_for_each_entry(pos, cursor, tasks_node) {
|
||||
if (&pos->tasks_node == &scx_tasks)
|
||||
@@ -1369,14 +1397,14 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
|
||||
* @include_dead: Whether we should include dead tasks in the iteration
|
||||
*
|
||||
* Visit the non-idle task with its rq lock held. Allows callers to specify
|
||||
* whether they would like to filter out dead tasks. See scx_task_iter_init()
|
||||
* whether they would like to filter out dead tasks. See scx_task_iter_start()
|
||||
* for details.
|
||||
*/
|
||||
static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
scx_task_iter_rq_unlock(iter);
|
||||
__scx_task_iter_rq_unlock(iter);
|
||||
|
||||
while ((p = scx_task_iter_next(iter))) {
|
||||
/*
|
||||
@@ -1944,7 +1972,6 @@ static bool scx_rq_online(struct rq *rq)
|
||||
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
||||
int sticky_cpu)
|
||||
{
|
||||
bool bypassing = scx_rq_bypassing(rq);
|
||||
struct task_struct **ddsp_taskp;
|
||||
unsigned long qseq;
|
||||
|
||||
@@ -1962,7 +1989,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
||||
if (!scx_rq_online(rq))
|
||||
goto local;
|
||||
|
||||
if (bypassing)
|
||||
if (scx_rq_bypassing(rq))
|
||||
goto global;
|
||||
|
||||
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
|
||||
@@ -2017,7 +2044,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
||||
|
||||
global:
|
||||
touch_core_sched(rq, p); /* see the comment in local: */
|
||||
p->scx.slice = bypassing ? SCX_SLICE_BYPASS : SCX_SLICE_DFL;
|
||||
p->scx.slice = SCX_SLICE_DFL;
|
||||
dispatch_enqueue(find_global_dsq(p), p, enq_flags);
|
||||
}
|
||||
|
||||
@@ -2953,8 +2980,8 @@ static struct task_struct *pick_task_scx(struct rq *rq)
|
||||
|
||||
if (unlikely(!p->scx.slice)) {
|
||||
if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) {
|
||||
printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n",
|
||||
p->comm, p->pid);
|
||||
printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n",
|
||||
p->comm, p->pid, __func__);
|
||||
scx_warned_zero_slice = true;
|
||||
}
|
||||
p->scx.slice = SCX_SLICE_DFL;
|
||||
@@ -3059,11 +3086,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
|
||||
|
||||
*found = false;
|
||||
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
return prev_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* If WAKE_SYNC, the waker's local DSQ is empty, and the system is
|
||||
* under utilized, wake up @p to the local DSQ of the waker. Checking
|
||||
@@ -3128,7 +3150,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
|
||||
if (unlikely(wake_flags & WF_EXEC))
|
||||
return prev_cpu;
|
||||
|
||||
if (SCX_HAS_OP(select_cpu)) {
|
||||
if (SCX_HAS_OP(select_cpu) && !scx_rq_bypassing(task_rq(p))) {
|
||||
s32 cpu;
|
||||
struct task_struct **ddsp_taskp;
|
||||
|
||||
@@ -3193,7 +3215,7 @@ void __scx_update_idle(struct rq *rq, bool idle)
|
||||
{
|
||||
int cpu = cpu_of(rq);
|
||||
|
||||
if (SCX_HAS_OP(update_idle)) {
|
||||
if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
|
||||
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
|
||||
if (!static_branch_unlikely(&scx_builtin_idle_enabled))
|
||||
return;
|
||||
@@ -4048,7 +4070,6 @@ static void scx_cgroup_exit(void)
|
||||
|
||||
percpu_rwsem_assert_held(&scx_cgroup_rwsem);
|
||||
|
||||
WARN_ON_ONCE(!scx_cgroup_enabled);
|
||||
scx_cgroup_enabled = false;
|
||||
|
||||
/*
|
||||
@@ -4117,6 +4138,7 @@ static int scx_cgroup_init(void)
|
||||
css->cgroup, &args);
|
||||
if (ret) {
|
||||
css_put(css);
|
||||
scx_ops_error("ops.cgroup_init() failed (%d)", ret);
|
||||
return ret;
|
||||
}
|
||||
tg->scx_flags |= SCX_TG_INITED;
|
||||
@@ -4256,21 +4278,23 @@ bool task_should_scx(struct task_struct *p)
|
||||
* the DISABLING state and then cycling the queued tasks through dequeue/enqueue
|
||||
* to force global FIFO scheduling.
|
||||
*
|
||||
* a. ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
|
||||
* %SCX_OPS_ENQ_LAST is also ignored.
|
||||
* - ops.select_cpu() is ignored and the default select_cpu() is used.
|
||||
*
|
||||
* b. ops.dispatch() is ignored.
|
||||
* - ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
|
||||
* %SCX_OPS_ENQ_LAST is also ignored.
|
||||
*
|
||||
* c. balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
|
||||
* can't be trusted. Whenever a tick triggers, the running task is rotated to
|
||||
* the tail of the queue with core_sched_at touched.
|
||||
* - ops.dispatch() is ignored.
|
||||
*
|
||||
* d. pick_next_task() suppresses zero slice warning.
|
||||
* - balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
|
||||
* can't be trusted. Whenever a tick triggers, the running task is rotated to
|
||||
* the tail of the queue with core_sched_at touched.
|
||||
*
|
||||
* e. scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
|
||||
* operations.
|
||||
* - pick_next_task() suppresses zero slice warning.
|
||||
*
|
||||
* f. scx_prio_less() reverts to the default core_sched_at order.
|
||||
* - scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
|
||||
* operations.
|
||||
*
|
||||
* - scx_prio_less() reverts to the default core_sched_at order.
|
||||
*/
|
||||
static void scx_ops_bypass(bool bypass)
|
||||
{
|
||||
@@ -4340,7 +4364,7 @@ static void scx_ops_bypass(bool bypass)
|
||||
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
/* kick to restore ticks */
|
||||
/* resched to restore ticks and idle state */
|
||||
resched_cpu(cpu);
|
||||
}
|
||||
}
|
||||
@@ -4462,16 +4486,14 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
|
||||
|
||||
scx_ops_init_task_enabled = false;
|
||||
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_init(&sti);
|
||||
scx_task_iter_start(&sti);
|
||||
while ((p = scx_task_iter_next_locked(&sti))) {
|
||||
const struct sched_class *old_class = p->sched_class;
|
||||
struct sched_enq_and_set_ctx ctx;
|
||||
|
||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||
|
||||
p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
|
||||
__setscheduler_prio(p, p->prio);
|
||||
p->sched_class = __setscheduler_class(p, p->prio);
|
||||
check_class_changing(task_rq(p), p, old_class);
|
||||
|
||||
sched_enq_and_set_task(&ctx);
|
||||
@@ -4479,8 +4501,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
|
||||
check_class_changed(task_rq(p), p, old_class, p->prio);
|
||||
scx_ops_exit_task(p);
|
||||
}
|
||||
scx_task_iter_exit(&sti);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_stop(&sti);
|
||||
percpu_up_write(&scx_fork_rwsem);
|
||||
|
||||
/* no task is on scx, turn off all the switches and flush in-progress calls */
|
||||
@@ -5041,6 +5062,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
if (ret) {
|
||||
ret = ops_sanitize_err("init", ret);
|
||||
cpus_read_unlock();
|
||||
scx_ops_error("ops.init() failed (%d)", ret);
|
||||
goto err_disable;
|
||||
}
|
||||
}
|
||||
@@ -5130,8 +5152,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
if (ret)
|
||||
goto err_disable_unlock_all;
|
||||
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_init(&sti);
|
||||
scx_task_iter_start(&sti);
|
||||
while ((p = scx_task_iter_next_locked(&sti))) {
|
||||
/*
|
||||
* @p may already be dead, have lost all its usages counts and
|
||||
@@ -5141,27 +5162,24 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
if (!tryget_task_struct(p))
|
||||
continue;
|
||||
|
||||
scx_task_iter_rq_unlock(&sti);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_unlock(&sti);
|
||||
|
||||
ret = scx_ops_init_task(p, task_group(p), false);
|
||||
if (ret) {
|
||||
put_task_struct(p);
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_exit(&sti);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
|
||||
ret, p->comm, p->pid);
|
||||
scx_task_iter_relock(&sti);
|
||||
scx_task_iter_stop(&sti);
|
||||
scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
|
||||
ret, p->comm, p->pid);
|
||||
goto err_disable_unlock_all;
|
||||
}
|
||||
|
||||
scx_set_task_state(p, SCX_TASK_READY);
|
||||
|
||||
put_task_struct(p);
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_relock(&sti);
|
||||
}
|
||||
scx_task_iter_exit(&sti);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_stop(&sti);
|
||||
scx_cgroup_unlock();
|
||||
percpu_up_write(&scx_fork_rwsem);
|
||||
|
||||
@@ -5178,35 +5196,28 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
* scx_tasks_lock.
|
||||
*/
|
||||
percpu_down_write(&scx_fork_rwsem);
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_init(&sti);
|
||||
scx_task_iter_start(&sti);
|
||||
while ((p = scx_task_iter_next_locked(&sti))) {
|
||||
const struct sched_class *old_class = p->sched_class;
|
||||
struct sched_enq_and_set_ctx ctx;
|
||||
|
||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||
|
||||
__setscheduler_prio(p, p->prio);
|
||||
p->scx.slice = SCX_SLICE_DFL;
|
||||
p->sched_class = __setscheduler_class(p, p->prio);
|
||||
check_class_changing(task_rq(p), p, old_class);
|
||||
|
||||
sched_enq_and_set_task(&ctx);
|
||||
|
||||
check_class_changed(task_rq(p), p, old_class, p->prio);
|
||||
}
|
||||
scx_task_iter_exit(&sti);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_stop(&sti);
|
||||
percpu_up_write(&scx_fork_rwsem);
|
||||
|
||||
scx_ops_bypass(false);
|
||||
|
||||
/*
|
||||
* Returning an error code here would lose the recorded error
|
||||
* information. Exit indicating success so that the error is notified
|
||||
* through ops.exit() with all the details.
|
||||
*/
|
||||
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
|
||||
WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
|
||||
ret = 0;
|
||||
goto err_disable;
|
||||
}
|
||||
|
||||
@@ -5241,10 +5252,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
scx_ops_bypass(false);
|
||||
err_disable:
|
||||
mutex_unlock(&scx_ops_enable_mutex);
|
||||
/* must be fully disabled before returning */
|
||||
scx_ops_disable(SCX_EXIT_ERROR);
|
||||
/*
|
||||
* Returning an error code here would not pass all the error information
|
||||
* to userspace. Record errno using scx_ops_error() for cases
|
||||
* scx_ops_error() wasn't already invoked and exit indicating success so
|
||||
* that the error is notified through ops.exit() with all the details.
|
||||
*
|
||||
* Flush scx_ops_disable_work to ensure that error is reported before
|
||||
* init completion.
|
||||
*/
|
||||
scx_ops_error("scx_ops_enable() failed (%d)", ret);
|
||||
kthread_flush_work(&scx_ops_disable_work);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -5864,16 +5883,21 @@ __bpf_kfunc_start_defs();
|
||||
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
|
||||
u64 wake_flags, bool *is_idle)
|
||||
{
|
||||
if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) {
|
||||
*is_idle = false;
|
||||
return prev_cpu;
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
goto prev_cpu;
|
||||
}
|
||||
|
||||
if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
|
||||
goto prev_cpu;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle);
|
||||
#else
|
||||
#endif
|
||||
|
||||
prev_cpu:
|
||||
*is_idle = false;
|
||||
return prev_cpu;
|
||||
#endif
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
@@ -1247,7 +1247,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||||
|
||||
account_cfs_rq_runtime(cfs_rq, delta_exec);
|
||||
|
||||
if (rq->nr_running == 1)
|
||||
if (cfs_rq->nr_running == 1)
|
||||
return;
|
||||
|
||||
if (resched || did_preempt_short(cfs_rq, curr)) {
|
||||
@@ -6058,10 +6058,13 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
|
||||
if (se->on_rq) {
|
||||
SCHED_WARN_ON(se->sched_delayed);
|
||||
/* Handle any unfinished DELAY_DEQUEUE business first. */
|
||||
if (se->sched_delayed) {
|
||||
int flags = DEQUEUE_SLEEP | DEQUEUE_DELAYED;
|
||||
|
||||
dequeue_entity(qcfs_rq, se, flags);
|
||||
} else if (se->on_rq)
|
||||
break;
|
||||
}
|
||||
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
@@ -13174,22 +13177,6 @@ static void attach_task_cfs_rq(struct task_struct *p)
|
||||
static void switched_from_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
detach_task_cfs_rq(p);
|
||||
/*
|
||||
* Since this is called after changing class, this is a little weird
|
||||
* and we cannot use DEQUEUE_DELAYED.
|
||||
*/
|
||||
if (p->se.sched_delayed) {
|
||||
/* First, dequeue it from its new class' structures */
|
||||
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
|
||||
/*
|
||||
* Now, clean up the fair_sched_class side of things
|
||||
* related to sched_delayed being true and that wasn't done
|
||||
* due to the generic dequeue not using DEQUEUE_DELAYED.
|
||||
*/
|
||||
finish_delayed_dequeue_entity(&p->se);
|
||||
p->se.rel_deadline = 0;
|
||||
__block_task(rq, p);
|
||||
}
|
||||
}
|
||||
|
||||
static void switched_to_fair(struct rq *rq, struct task_struct *p)
|
||||
|
||||
@@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
|
||||
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
|
||||
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
|
||||
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
|
||||
#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static_assert(WF_EXEC == SD_BALANCE_EXEC);
|
||||
@@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
|
||||
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
|
||||
* ENQUEUE_MIGRATED - the task was migrated during wakeup
|
||||
* ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
#define ENQUEUE_INITIAL 0x80
|
||||
#define ENQUEUE_MIGRATING 0x100
|
||||
#define ENQUEUE_DELAYED 0x200
|
||||
#define ENQUEUE_RQ_SELECTED 0x400
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
@@ -3797,7 +3800,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
|
||||
|
||||
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
|
||||
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
|
||||
extern void __setscheduler_prio(struct task_struct *p, int prio);
|
||||
extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio);
|
||||
extern void set_load_weight(struct task_struct *p, bool update_load);
|
||||
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
|
||||
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
@@ -119,45 +119,63 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
|
||||
/*
|
||||
* PSI tracks state that persists across sleeps, such as iowaits and
|
||||
* memory stalls. As a result, it has to distinguish between sleeps,
|
||||
* where a task's runnable state changes, and requeues, where a task
|
||||
* and its state are being moved between CPUs and runqueues.
|
||||
* where a task's runnable state changes, and migrations, where a task
|
||||
* and its runnable state are being moved between CPUs and runqueues.
|
||||
*
|
||||
* A notable case is a task whose dequeue is delayed. PSI considers
|
||||
* those sleeping, but because they are still on the runqueue they can
|
||||
* go through migration requeues. In this case, *sleeping* states need
|
||||
* to be transferred.
|
||||
*/
|
||||
static inline void psi_enqueue(struct task_struct *p, bool wakeup)
|
||||
static inline void psi_enqueue(struct task_struct *p, bool migrate)
|
||||
{
|
||||
int clear = 0, set = TSK_RUNNING;
|
||||
int clear = 0, set = 0;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
if (p->in_memstall)
|
||||
set |= TSK_MEMSTALL_RUNNING;
|
||||
|
||||
if (!wakeup) {
|
||||
if (p->se.sched_delayed) {
|
||||
/* CPU migration of "sleeping" task */
|
||||
SCHED_WARN_ON(!migrate);
|
||||
if (p->in_memstall)
|
||||
set |= TSK_MEMSTALL;
|
||||
if (p->in_iowait)
|
||||
set |= TSK_IOWAIT;
|
||||
} else if (migrate) {
|
||||
/* CPU migration of runnable task */
|
||||
set = TSK_RUNNING;
|
||||
if (p->in_memstall)
|
||||
set |= TSK_MEMSTALL | TSK_MEMSTALL_RUNNING;
|
||||
} else {
|
||||
/* Wakeup of new or sleeping task */
|
||||
if (p->in_iowait)
|
||||
clear |= TSK_IOWAIT;
|
||||
set = TSK_RUNNING;
|
||||
if (p->in_memstall)
|
||||
set |= TSK_MEMSTALL_RUNNING;
|
||||
}
|
||||
|
||||
psi_task_change(p, clear, set);
|
||||
}
|
||||
|
||||
static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
||||
static inline void psi_dequeue(struct task_struct *p, bool migrate)
|
||||
{
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
/*
|
||||
* When migrating a task to another CPU, clear all psi
|
||||
* state. The enqueue callback above will work it out.
|
||||
*/
|
||||
if (migrate)
|
||||
psi_task_change(p, p->psi_flags, 0);
|
||||
|
||||
/*
|
||||
* A voluntary sleep is a dequeue followed by a task switch. To
|
||||
* avoid walking all ancestors twice, psi_task_switch() handles
|
||||
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
|
||||
* Do nothing here.
|
||||
*/
|
||||
if (sleep)
|
||||
return;
|
||||
|
||||
psi_task_change(p, p->psi_flags, 0);
|
||||
}
|
||||
|
||||
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
||||
@@ -190,8 +208,8 @@ static inline void psi_sched_switch(struct task_struct *prev,
|
||||
}
|
||||
|
||||
#else /* CONFIG_PSI */
|
||||
static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
|
||||
static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
|
||||
static inline void psi_enqueue(struct task_struct *p, bool migrate) {}
|
||||
static inline void psi_dequeue(struct task_struct *p, bool migrate) {}
|
||||
static inline void psi_ttwu_dequeue(struct task_struct *p) {}
|
||||
static inline void psi_sched_switch(struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
|
||||
@@ -529,7 +529,7 @@ int __sched_setscheduler(struct task_struct *p,
|
||||
{
|
||||
int oldpolicy = -1, policy = attr->sched_policy;
|
||||
int retval, oldprio, newprio, queued, running;
|
||||
const struct sched_class *prev_class;
|
||||
const struct sched_class *prev_class, *next_class;
|
||||
struct balance_callback *head;
|
||||
struct rq_flags rf;
|
||||
int reset_on_fork;
|
||||
@@ -706,6 +706,12 @@ int __sched_setscheduler(struct task_struct *p,
|
||||
queue_flags &= ~DEQUEUE_MOVE;
|
||||
}
|
||||
|
||||
prev_class = p->sched_class;
|
||||
next_class = __setscheduler_class(p, newprio);
|
||||
|
||||
if (prev_class != next_class && p->se.sched_delayed)
|
||||
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
@@ -713,11 +719,10 @@ int __sched_setscheduler(struct task_struct *p,
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
prev_class = p->sched_class;
|
||||
|
||||
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
|
||||
__setscheduler_params(p, attr);
|
||||
__setscheduler_prio(p, newprio);
|
||||
p->sched_class = next_class;
|
||||
p->prio = newprio;
|
||||
}
|
||||
__setscheduler_uclamp(p, attr);
|
||||
check_class_changing(rq, p, prev_class);
|
||||
|
||||
@@ -55,15 +55,26 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
||||
enum task_work_notify_mode notify)
|
||||
{
|
||||
struct callback_head *head;
|
||||
int flags = notify & TWA_FLAGS;
|
||||
|
||||
notify &= ~TWA_FLAGS;
|
||||
if (notify == TWA_NMI_CURRENT) {
|
||||
if (WARN_ON_ONCE(task != current))
|
||||
return -EINVAL;
|
||||
if (!IS_ENABLED(CONFIG_IRQ_WORK))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* record the work call stack in order to print it in KASAN reports */
|
||||
kasan_record_aux_stack(work);
|
||||
/*
|
||||
* Record the work call stack in order to print it in KASAN
|
||||
* reports.
|
||||
*
|
||||
* Note that stack allocation can fail if TWAF_NO_ALLOC flag
|
||||
* is set and new page is needed to expand the stack buffer.
|
||||
*/
|
||||
if (flags & TWAF_NO_ALLOC)
|
||||
kasan_record_aux_stack_noalloc(work);
|
||||
else
|
||||
kasan_record_aux_stack(work);
|
||||
}
|
||||
|
||||
head = READ_ONCE(task->task_works);
|
||||
|
||||
@@ -309,6 +309,9 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts)
|
||||
struct posix_clock_desc cd;
|
||||
int err;
|
||||
|
||||
if (!timespec64_valid_strict(ts))
|
||||
return -EINVAL;
|
||||
|
||||
err = get_clock_desc(id, &cd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -434,6 +434,12 @@ static void tick_nohz_kick_task(struct task_struct *tsk)
|
||||
* smp_mb__after_spin_lock()
|
||||
* tick_nohz_task_switch()
|
||||
* LOAD p->tick_dep_mask
|
||||
*
|
||||
* XXX given a task picks up the dependency on schedule(), should we
|
||||
* only care about tasks that are currently on the CPU instead of all
|
||||
* that are on the runqueue?
|
||||
*
|
||||
* That is, does this want to be: task_on_cpu() / task_curr()?
|
||||
*/
|
||||
if (!sched_task_on_rq(tsk))
|
||||
return;
|
||||
|
||||
@@ -1226,7 +1226,7 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
|
||||
.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
|
||||
.arg3_size = sizeof(u64),
|
||||
};
|
||||
|
||||
@@ -1243,7 +1243,7 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = {
|
||||
.func = get_func_ret,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
|
||||
.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
|
||||
.arg2_size = sizeof(u64),
|
||||
};
|
||||
|
||||
@@ -2240,8 +2240,6 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
|
||||
|
||||
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
|
||||
ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
|
||||
if (ret == -ENOENT)
|
||||
goto unlock;
|
||||
if (ret < 0) {
|
||||
bpf_prog_array_delete_safe(old_array, event->prog);
|
||||
} else {
|
||||
@@ -3157,7 +3155,8 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
|
||||
struct bpf_uprobe_multi_link *umulti_link;
|
||||
u32 ucount = info->uprobe_multi.count;
|
||||
int err = 0, i;
|
||||
long left;
|
||||
char *p, *buf;
|
||||
long left = 0;
|
||||
|
||||
if (!upath ^ !upath_size)
|
||||
return -EINVAL;
|
||||
@@ -3171,26 +3170,23 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
|
||||
info->uprobe_multi.pid = umulti_link->task ?
|
||||
task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
|
||||
|
||||
if (upath) {
|
||||
char *p, *buf;
|
||||
|
||||
upath_size = min_t(u32, upath_size, PATH_MAX);
|
||||
|
||||
buf = kmalloc(upath_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
p = d_path(&umulti_link->path, buf, upath_size);
|
||||
if (IS_ERR(p)) {
|
||||
kfree(buf);
|
||||
return PTR_ERR(p);
|
||||
}
|
||||
upath_size = buf + upath_size - p;
|
||||
left = copy_to_user(upath, p, upath_size);
|
||||
upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
|
||||
buf = kmalloc(upath_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
p = d_path(&umulti_link->path, buf, upath_size);
|
||||
if (IS_ERR(p)) {
|
||||
kfree(buf);
|
||||
if (left)
|
||||
return -EFAULT;
|
||||
info->uprobe_multi.path_size = upath_size;
|
||||
return PTR_ERR(p);
|
||||
}
|
||||
upath_size = buf + upath_size - p;
|
||||
|
||||
if (upath)
|
||||
left = copy_to_user(upath, p, upath_size);
|
||||
kfree(buf);
|
||||
if (left)
|
||||
return -EFAULT;
|
||||
info->uprobe_multi.path_size = upath_size;
|
||||
|
||||
if (!uoffsets && !ucookies && !uref_ctr_offsets)
|
||||
return 0;
|
||||
|
||||
@@ -1160,19 +1160,14 @@ void fgraph_update_pid_func(void)
|
||||
static int start_graph_tracing(void)
|
||||
{
|
||||
unsigned long **ret_stack_list;
|
||||
int ret, cpu;
|
||||
int ret;
|
||||
|
||||
ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
|
||||
ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
|
||||
sizeof(*ret_stack_list), GFP_KERNEL);
|
||||
|
||||
if (!ret_stack_list)
|
||||
return -ENOMEM;
|
||||
|
||||
/* The cpu_boot init_task->ret_stack will never be freed */
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!idle_task(cpu)->ret_stack)
|
||||
ftrace_graph_init_idle_task(idle_task(cpu), cpu);
|
||||
}
|
||||
|
||||
do {
|
||||
ret = alloc_retstack_tasklist(ret_stack_list);
|
||||
} while (ret == -EAGAIN);
|
||||
@@ -1242,14 +1237,34 @@ static void ftrace_graph_disable_direct(bool disable_branch)
|
||||
fgraph_direct_gops = &fgraph_stub;
|
||||
}
|
||||
|
||||
/* The cpu_boot init_task->ret_stack will never be freed */
|
||||
static int fgraph_cpu_init(unsigned int cpu)
|
||||
{
|
||||
if (!idle_task(cpu)->ret_stack)
|
||||
ftrace_graph_init_idle_task(idle_task(cpu), cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int register_ftrace_graph(struct fgraph_ops *gops)
|
||||
{
|
||||
static bool fgraph_initialized;
|
||||
int command = 0;
|
||||
int ret = 0;
|
||||
int i = -1;
|
||||
|
||||
mutex_lock(&ftrace_lock);
|
||||
|
||||
if (!fgraph_initialized) {
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init",
|
||||
fgraph_cpu_init, NULL);
|
||||
if (ret < 0) {
|
||||
pr_warn("fgraph: Error to init cpu hotplug support\n");
|
||||
return ret;
|
||||
}
|
||||
fgraph_initialized = true;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (!fgraph_array[0]) {
|
||||
/* The array must always have real data on it */
|
||||
for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
|
||||
|
||||
@@ -2337,9 +2337,12 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
|
||||
if (!buffer->buffers[cpu])
|
||||
goto fail_free_buffers;
|
||||
|
||||
ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
|
||||
if (ret < 0)
|
||||
goto fail_free_buffers;
|
||||
/* If already mapped, do not hook to CPU hotplug */
|
||||
if (!start) {
|
||||
ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
|
||||
if (ret < 0)
|
||||
goto fail_free_buffers;
|
||||
}
|
||||
|
||||
mutex_init(&buffer->mutex);
|
||||
|
||||
@@ -6725,39 +6728,38 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
|
||||
}
|
||||
|
||||
for_each_buffer_cpu(buffer, cpu) {
|
||||
struct buffer_data_page *old_free_data_page;
|
||||
struct list_head old_pages;
|
||||
unsigned long flags;
|
||||
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
||||
continue;
|
||||
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
|
||||
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
|
||||
|
||||
/* Clear the head bit to make the link list normal to read */
|
||||
rb_head_page_deactivate(cpu_buffer);
|
||||
|
||||
/* Now walk the list and free all the old sub buffers */
|
||||
list_for_each_entry_safe(bpage, tmp, cpu_buffer->pages, list) {
|
||||
list_del_init(&bpage->list);
|
||||
free_buffer_page(bpage);
|
||||
}
|
||||
/* The above loop stopped an the last page needing to be freed */
|
||||
bpage = list_entry(cpu_buffer->pages, struct buffer_page, list);
|
||||
free_buffer_page(bpage);
|
||||
|
||||
/* Free the current reader page */
|
||||
free_buffer_page(cpu_buffer->reader_page);
|
||||
/*
|
||||
* Collect buffers from the cpu_buffer pages list and the
|
||||
* reader_page on old_pages, so they can be freed later when not
|
||||
* under a spinlock. The pages list is a linked list with no
|
||||
* head, adding old_pages turns it into a regular list with
|
||||
* old_pages being the head.
|
||||
*/
|
||||
list_add(&old_pages, cpu_buffer->pages);
|
||||
list_add(&cpu_buffer->reader_page->list, &old_pages);
|
||||
|
||||
/* One page was allocated for the reader page */
|
||||
cpu_buffer->reader_page = list_entry(cpu_buffer->new_pages.next,
|
||||
struct buffer_page, list);
|
||||
list_del_init(&cpu_buffer->reader_page->list);
|
||||
|
||||
/* The cpu_buffer pages are a link list with no head */
|
||||
/* Install the new pages, remove the head from the list */
|
||||
cpu_buffer->pages = cpu_buffer->new_pages.next;
|
||||
cpu_buffer->new_pages.next->prev = cpu_buffer->new_pages.prev;
|
||||
cpu_buffer->new_pages.prev->next = cpu_buffer->new_pages.next;
|
||||
|
||||
/* Clear the new_pages list */
|
||||
INIT_LIST_HEAD(&cpu_buffer->new_pages);
|
||||
list_del_init(&cpu_buffer->new_pages);
|
||||
|
||||
cpu_buffer->head_page
|
||||
= list_entry(cpu_buffer->pages, struct buffer_page, list);
|
||||
@@ -6766,11 +6768,20 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
|
||||
cpu_buffer->nr_pages = cpu_buffer->nr_pages_to_update;
|
||||
cpu_buffer->nr_pages_to_update = 0;
|
||||
|
||||
free_pages((unsigned long)cpu_buffer->free_page, old_order);
|
||||
old_free_data_page = cpu_buffer->free_page;
|
||||
cpu_buffer->free_page = NULL;
|
||||
|
||||
rb_head_page_activate(cpu_buffer);
|
||||
|
||||
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
|
||||
|
||||
/* Free old sub buffers */
|
||||
list_for_each_entry_safe(bpage, tmp, &old_pages, list) {
|
||||
list_del_init(&bpage->list);
|
||||
free_buffer_page(bpage);
|
||||
}
|
||||
free_pages((unsigned long)old_free_data_page, old_order);
|
||||
|
||||
rb_check_pages(cpu_buffer);
|
||||
}
|
||||
|
||||
|
||||
@@ -3697,8 +3697,8 @@ static void test_can_verify(void)
|
||||
void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
|
||||
va_list ap)
|
||||
{
|
||||
long text_delta = iter->tr->text_delta;
|
||||
long data_delta = iter->tr->data_delta;
|
||||
long text_delta = 0;
|
||||
long data_delta = 0;
|
||||
const char *p = fmt;
|
||||
const char *str;
|
||||
bool good;
|
||||
@@ -3710,6 +3710,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
|
||||
if (static_branch_unlikely(&trace_no_verify))
|
||||
goto print;
|
||||
|
||||
/*
|
||||
* When the kernel is booted with the tp_printk command line
|
||||
* parameter, trace events go directly through to printk().
|
||||
* It also is checked by this function, but it does not
|
||||
* have an associated trace_array (tr) for it.
|
||||
*/
|
||||
if (iter->tr) {
|
||||
text_delta = iter->tr->text_delta;
|
||||
data_delta = iter->tr->data_delta;
|
||||
}
|
||||
|
||||
/* Don't bother checking when doing a ftrace_dump() */
|
||||
if (iter->fmt == static_fmt_buf)
|
||||
goto print;
|
||||
@@ -10610,10 +10621,10 @@ __init static void enable_instances(void)
|
||||
* cannot be deleted by user space, so keep the reference
|
||||
* to it.
|
||||
*/
|
||||
if (start)
|
||||
if (start) {
|
||||
tr->flags |= TRACE_ARRAY_FL_BOOT;
|
||||
else
|
||||
trace_array_put(tr);
|
||||
tr->ref++;
|
||||
}
|
||||
|
||||
while ((tok = strsep(&curr_str, ","))) {
|
||||
early_enable_events(tr, tok, true);
|
||||
|
||||
@@ -912,6 +912,11 @@ static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
if (argc - 2 > MAX_TRACE_ARGS) {
|
||||
ret = -E2BIG;
|
||||
goto error;
|
||||
}
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
event_call = find_and_get_event(sys_name, sys_event);
|
||||
ep = alloc_event_probe(group, event, event_call, argc - 2);
|
||||
@@ -937,7 +942,7 @@ static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
|
||||
argc -= 2; argv += 2;
|
||||
/* parse arguments */
|
||||
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
|
||||
for (i = 0; i < argc; i++) {
|
||||
trace_probe_log_set_index(i + 2);
|
||||
ret = trace_eprobe_tp_update_arg(ep, argv, i);
|
||||
if (ret)
|
||||
|
||||
@@ -1187,6 +1187,10 @@ static int __trace_fprobe_create(int argc, const char *argv[])
|
||||
argc = new_argc;
|
||||
argv = new_argv;
|
||||
}
|
||||
if (argc > MAX_TRACE_ARGS) {
|
||||
ret = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
|
||||
if (ret)
|
||||
@@ -1203,7 +1207,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
|
||||
}
|
||||
|
||||
/* parse arguments */
|
||||
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
|
||||
for (i = 0; i < argc; i++) {
|
||||
trace_probe_log_set_index(i + 2);
|
||||
ctx.offset = 0;
|
||||
ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx);
|
||||
|
||||
@@ -520,6 +520,8 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
|
||||
if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU)
|
||||
goto out_unlock;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
goto out_unlock;
|
||||
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask))
|
||||
goto out_unlock;
|
||||
|
||||
|
||||
@@ -1013,6 +1013,10 @@ static int __trace_kprobe_create(int argc, const char *argv[])
|
||||
argc = new_argc;
|
||||
argv = new_argv;
|
||||
}
|
||||
if (argc > MAX_TRACE_ARGS) {
|
||||
ret = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
|
||||
if (ret)
|
||||
@@ -1029,7 +1033,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
|
||||
}
|
||||
|
||||
/* parse arguments */
|
||||
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
|
||||
for (i = 0; i < argc; i++) {
|
||||
trace_probe_log_set_index(i + 2);
|
||||
ctx.offset = 0;
|
||||
ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], &ctx);
|
||||
|
||||
@@ -1953,12 +1953,8 @@ static void stop_kthread(unsigned int cpu)
|
||||
{
|
||||
struct task_struct *kthread;
|
||||
|
||||
mutex_lock(&interface_lock);
|
||||
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
|
||||
kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
|
||||
if (kthread) {
|
||||
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
|
||||
mutex_unlock(&interface_lock);
|
||||
|
||||
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
|
||||
!WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
|
||||
kthread_stop(kthread);
|
||||
@@ -1972,7 +1968,6 @@ static void stop_kthread(unsigned int cpu)
|
||||
put_task_struct(kthread);
|
||||
}
|
||||
} else {
|
||||
mutex_unlock(&interface_lock);
|
||||
/* if no workload, just return */
|
||||
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
|
||||
/*
|
||||
@@ -1994,8 +1989,12 @@ static void stop_per_cpu_kthreads(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
cpus_read_lock();
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
stop_kthread(cpu);
|
||||
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2007,6 +2006,10 @@ static int start_kthread(unsigned int cpu)
|
||||
void *main = osnoise_main;
|
||||
char comm[24];
|
||||
|
||||
/* Do not start a new thread if it is already running */
|
||||
if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
|
||||
return 0;
|
||||
|
||||
if (timerlat_enabled()) {
|
||||
snprintf(comm, 24, "timerlat/%d", cpu);
|
||||
main = timerlat_main;
|
||||
@@ -2061,11 +2064,10 @@ static int start_per_cpu_kthreads(void)
|
||||
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
|
||||
struct task_struct *kthread;
|
||||
|
||||
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
|
||||
kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
|
||||
if (!WARN_ON(!kthread))
|
||||
kthread_stop(kthread);
|
||||
}
|
||||
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, current_mask) {
|
||||
@@ -2095,6 +2097,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
|
||||
mutex_lock(&interface_lock);
|
||||
cpus_read_lock();
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
goto out_unlock;
|
||||
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
|
||||
goto out_unlock;
|
||||
|
||||
|
||||
@@ -276,7 +276,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
|
||||
}
|
||||
trace_probe_log_err(offset, NO_EVENT_NAME);
|
||||
return -EINVAL;
|
||||
} else if (len > MAX_EVENT_NAME_LEN) {
|
||||
} else if (len >= MAX_EVENT_NAME_LEN) {
|
||||
trace_probe_log_err(offset, EVENT_TOO_LONG);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -1485,7 +1485,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
|
||||
/* reset the max latency */
|
||||
tr->max_latency = 0;
|
||||
|
||||
while (p->on_rq) {
|
||||
while (task_is_runnable(p)) {
|
||||
/*
|
||||
* Sleep to make sure the -deadline thread is asleep too.
|
||||
* On virtual machines we can't rely on timings,
|
||||
|
||||
@@ -565,6 +565,8 @@ static int __trace_uprobe_create(int argc, const char **argv)
|
||||
|
||||
if (argc < 2)
|
||||
return -ECANCELED;
|
||||
if (argc - 2 > MAX_TRACE_ARGS)
|
||||
return -E2BIG;
|
||||
|
||||
if (argv[0][1] == ':')
|
||||
event = &argv[0][2];
|
||||
@@ -690,7 +692,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
|
||||
tu->filename = filename;
|
||||
|
||||
/* parse arguments */
|
||||
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
|
||||
for (i = 0; i < argc; i++) {
|
||||
struct traceprobe_parse_context ctx = {
|
||||
.flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER,
|
||||
};
|
||||
@@ -875,6 +877,7 @@ struct uprobe_cpu_buffer {
|
||||
};
|
||||
static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
|
||||
static int uprobe_buffer_refcnt;
|
||||
#define MAX_UCB_BUFFER_SIZE PAGE_SIZE
|
||||
|
||||
static int uprobe_buffer_init(void)
|
||||
{
|
||||
@@ -979,6 +982,11 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
|
||||
ucb = uprobe_buffer_get();
|
||||
ucb->dsize = tu->tp.size + dsize;
|
||||
|
||||
if (WARN_ON_ONCE(ucb->dsize > MAX_UCB_BUFFER_SIZE)) {
|
||||
ucb->dsize = MAX_UCB_BUFFER_SIZE;
|
||||
dsize = MAX_UCB_BUFFER_SIZE - tu->tp.size;
|
||||
}
|
||||
|
||||
store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
|
||||
|
||||
*ucbp = ucb;
|
||||
@@ -998,9 +1006,6 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
|
||||
|
||||
WARN_ON(call != trace_file->event_call);
|
||||
|
||||
if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
|
||||
return;
|
||||
|
||||
if (trace_trigger_soft_disabled(trace_file))
|
||||
return;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user