diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8674a5fa5437..9db5002a2f4b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -27,6 +27,16 @@ struct scx_sched __rcu *scx_root; */ static LIST_HEAD(scx_sched_all); +#ifdef CONFIG_EXT_SUB_SCHED +static const struct rhashtable_params scx_sched_hash_params = { + .key_len = sizeof_field(struct scx_sched, ops.sub_cgroup_id), + .key_offset = offsetof(struct scx_sched, ops.sub_cgroup_id), + .head_offset = offsetof(struct scx_sched, hash_node), +}; + +static struct rhashtable scx_sched_hash; +#endif + /* * During exit, a task may schedule after losing its PIDs. When disabling the * BPF scheduler, we need to be able to iterate tasks in every state to @@ -287,6 +297,12 @@ static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, return NULL; } +static struct scx_sched *scx_find_sub_sched(u64 cgroup_id) +{ + return rhashtable_lookup(&scx_sched_hash, &cgroup_id, + scx_sched_hash_params); +} + static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) { rcu_assign_pointer(p->scx.sched, sch); @@ -294,6 +310,7 @@ static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) #else /* CONFIG_EXT_SUB_SCHED */ static struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; } static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; } +static struct scx_sched *scx_find_sub_sched(u64 cgroup_id) { return NULL; } static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) {} #endif /* CONFIG_EXT_SUB_SCHED */ @@ -4830,26 +4847,41 @@ static void refresh_watchdog(void) cancel_delayed_work_sync(&scx_watchdog_work); } -static void scx_link_sched(struct scx_sched *sch) +static s32 scx_link_sched(struct scx_sched *sch) { scoped_guard(raw_spinlock_irq, &scx_sched_lock) { #ifdef CONFIG_EXT_SUB_SCHED struct scx_sched *parent = scx_parent(sch); - if (parent) + s32 ret; + + if (parent) { + ret = rhashtable_lookup_insert_fast(&scx_sched_hash, + &sch->hash_node, scx_sched_hash_params); + if (ret) { + scx_error(sch, "failed to insert into scx_sched_hash (%d)", ret); + return ret; + } + list_add_tail(&sch->sibling, &parent->children); + } #endif /* CONFIG_EXT_SUB_SCHED */ + list_add_tail_rcu(&sch->all, &scx_sched_all); } refresh_watchdog(); + return 0; } static void scx_unlink_sched(struct scx_sched *sch) { scoped_guard(raw_spinlock_irq, &scx_sched_lock) { #ifdef CONFIG_EXT_SUB_SCHED - if (scx_parent(sch)) + if (scx_parent(sch)) { + rhashtable_remove_fast(&scx_sched_hash, &sch->hash_node, + scx_sched_hash_params); list_del_init(&sch->sibling); + } #endif /* CONFIG_EXT_SUB_SCHED */ list_del_rcu(&sch->all); } @@ -5906,7 +5938,9 @@ static void scx_root_enable_workfn(struct kthread_work *work) */ rcu_assign_pointer(scx_root, sch); - scx_link_sched(sch); + ret = scx_link_sched(sch); + if (ret) + goto err_disable; scx_idle_enable(ops); @@ -6171,7 +6205,9 @@ static void scx_sub_enable_workfn(struct kthread_work *work) goto out_put_cgrp; } - scx_link_sched(sch); + ret = scx_link_sched(sch); + if (ret) + goto err_disable; if (sch->level >= SCX_SUB_MAX_DEPTH) { scx_error(sch, "max nesting depth %d violated", @@ -6999,6 +7035,10 @@ void __init init_sched_ext_class(void) register_sysrq_key('S', &sysrq_sched_ext_reset_op); register_sysrq_key('D', &sysrq_sched_ext_dump_op); INIT_DELAYED_WORK(&scx_watchdog_work, scx_watchdog_workfn); + +#ifdef CONFIG_EXT_SUB_SCHED + BUG_ON(rhashtable_init(&scx_sched_hash, &scx_sched_hash_params)); +#endif /* CONFIG_EXT_SUB_SCHED */ } diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h index aac051e27f7f..4cb97093b872 100644 --- a/kernel/sched/ext_internal.h +++ b/kernel/sched/ext_internal.h @@ -1014,6 +1014,8 @@ struct scx_sched { struct list_head all; #ifdef CONFIG_EXT_SUB_SCHED + struct rhash_head hash_node; + struct list_head children; struct list_head sibling; struct cgroup *cgrp;