mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 14:41:22 -05:00
Merge tag 'sched_ext-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext updates from Tejun Heo: - Add mechanism to count and report internal events. This significantly improves visibility on subtle corner conditions. - The default idle CPU selection logic is revamped and improved in multiple ways including being made topology aware. - sched_ext was disabling ttwu_queue for simplicity, which can be costly when hardware topology is more complex. Implement SCX_OPS_ALLOWED_QUEUED_WAKEUP so that BPF schedulers can selectively enable ttwu_queue. - tools/sched_ext updates to improve compatibility among others. - Other misc updates and fixes. - sched_ext/for-6.14-fixes were pulled a few times to receive prerequisite fixes and resolve conflicts. * tag 'sched_ext-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: (42 commits) sched_ext: idle: Refactor scx_select_cpu_dfl() sched_ext: idle: Honor idle flags in the built-in idle selection policy sched_ext: Skip per-CPU tasks in scx_bpf_reenqueue_local() sched_ext: Add trace point to track sched_ext core events sched_ext: Change the event type from u64 to s64 sched_ext: Documentation: add task lifecycle summary tools/sched_ext: Provide a compatible helper for scx_bpf_events() selftests/sched_ext: Add NUMA-aware scheduler test tools/sched_ext: Provide consistent access to scx flags sched_ext: idle: Fix scx_bpf_pick_any_cpu_node() behavior sched_ext: idle: Introduce scx_bpf_nr_node_ids() sched_ext: idle: Introduce node-aware idle cpu kfunc helpers sched_ext: idle: Per-node idle cpumasks sched_ext: idle: Introduce SCX_OPS_BUILTIN_IDLE_PER_NODE sched_ext: idle: Make idle static keys private sched/topology: Introduce for_each_node_numadist() iterator mm/numa: Introduce nearest_node_nodemask() nodemask: numa: reorganize inclusion path nodemask: add nodes_copy() tools/sched_ext: Sync with scx repo ...
This commit is contained in:
@@ -294,6 +294,42 @@ dispatching, and must be dispatched to with ``scx_bpf_dsq_insert()``. See
|
||||
the function documentation and usage in ``tools/sched_ext/scx_simple.bpf.c``
|
||||
for more information.
|
||||
|
||||
Task Lifecycle
|
||||
--------------
|
||||
|
||||
The following pseudo-code summarizes the entire lifecycle of a task managed
|
||||
by a sched_ext scheduler:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
ops.init_task(); /* A new task is created */
|
||||
ops.enable(); /* Enable BPF scheduling for the task */
|
||||
|
||||
while (task in SCHED_EXT) {
|
||||
if (task can migrate)
|
||||
ops.select_cpu(); /* Called on wakeup (optimization) */
|
||||
|
||||
ops.runnable(); /* Task becomes ready to run */
|
||||
|
||||
while (task is runnable) {
|
||||
if (task is not in a DSQ) {
|
||||
ops.enqueue(); /* Task can be added to a DSQ */
|
||||
|
||||
/* A CPU becomes available */
|
||||
|
||||
ops.dispatch(); /* Task is moved to a local DSQ */
|
||||
}
|
||||
ops.running(); /* Task starts running on its assigned CPU */
|
||||
ops.tick(); /* Called every 1/HZ seconds */
|
||||
ops.stopping(); /* Task stops running (time slice expires or wait) */
|
||||
}
|
||||
|
||||
ops.quiescent(); /* Task releases its assigned CPU (wait) */
|
||||
}
|
||||
|
||||
ops.disable(); /* Disable BPF scheduling for the task */
|
||||
ops.exit_task(); /* Task is destroyed */
|
||||
|
||||
Where to Look
|
||||
=============
|
||||
|
||||
|
||||
@@ -21196,8 +21196,7 @@ S: Maintained
|
||||
W: https://github.com/sched-ext/scx
|
||||
T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git
|
||||
F: include/linux/sched/ext.h
|
||||
F: kernel/sched/ext.h
|
||||
F: kernel/sched/ext.c
|
||||
F: kernel/sched/ext*
|
||||
F: tools/sched_ext/
|
||||
F: tools/testing/selftests/sched_ext
|
||||
|
||||
|
||||
@@ -94,7 +94,6 @@
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/nodemask_types.h>
|
||||
#include <linux/numa.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
extern nodemask_t _unused_nodemask_arg_;
|
||||
@@ -191,6 +190,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s
|
||||
bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
|
||||
}
|
||||
|
||||
#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES)
|
||||
static __always_inline void __nodes_copy(nodemask_t *dstp,
|
||||
const nodemask_t *srcp, unsigned int nbits)
|
||||
{
|
||||
bitmap_copy(dstp->bits, srcp->bits, nbits);
|
||||
}
|
||||
|
||||
#define nodes_complement(dst, src) \
|
||||
__nodes_complement(&(dst), &(src), MAX_NUMNODES)
|
||||
static __always_inline void __nodes_complement(nodemask_t *dstp,
|
||||
|
||||
@@ -3,7 +3,16 @@
|
||||
#define __LINUX_NODEMASK_TYPES_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/numa.h>
|
||||
|
||||
#ifdef CONFIG_NODES_SHIFT
|
||||
#define NODES_SHIFT CONFIG_NODES_SHIFT
|
||||
#else
|
||||
#define NODES_SHIFT 0
|
||||
#endif
|
||||
|
||||
#define MAX_NUMNODES (1 << NODES_SHIFT)
|
||||
|
||||
#define NUMA_NO_NODE (-1)
|
||||
|
||||
typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
|
||||
|
||||
|
||||
@@ -3,16 +3,8 @@
|
||||
#define _LINUX_NUMA_H
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/nodemask.h>
|
||||
|
||||
#ifdef CONFIG_NODES_SHIFT
|
||||
#define NODES_SHIFT CONFIG_NODES_SHIFT
|
||||
#else
|
||||
#define NODES_SHIFT 0
|
||||
#endif
|
||||
|
||||
#define MAX_NUMNODES (1 << NODES_SHIFT)
|
||||
|
||||
#define NUMA_NO_NODE (-1)
|
||||
#define NUMA_NO_MEMBLK (-1)
|
||||
|
||||
static inline bool numa_valid_node(int nid)
|
||||
@@ -39,6 +31,8 @@ void __init alloc_offline_node_data(int nid);
|
||||
/* Generic implementation available */
|
||||
int numa_nearest_node(int node, unsigned int state);
|
||||
|
||||
int nearest_node_nodemask(int node, nodemask_t *mask);
|
||||
|
||||
#ifndef memory_add_physaddr_to_nid
|
||||
int memory_add_physaddr_to_nid(u64 start);
|
||||
#endif
|
||||
@@ -55,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state)
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
static inline int nearest_node_nodemask(int node, nodemask_t *mask)
|
||||
{
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
static inline int memory_add_physaddr_to_nid(u64 start)
|
||||
{
|
||||
return 0;
|
||||
|
||||
@@ -146,6 +146,7 @@ struct sched_ext_entity {
|
||||
u32 weight;
|
||||
s32 sticky_cpu;
|
||||
s32 holding_cpu;
|
||||
s32 selected_cpu;
|
||||
u32 kf_mask; /* see scx_kf_mask above */
|
||||
struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */
|
||||
atomic_long_t ops_state;
|
||||
|
||||
@@ -261,6 +261,36 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops)
|
||||
}
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
/**
|
||||
* for_each_node_numadist() - iterate over nodes in increasing distance
|
||||
* order, starting from a given node
|
||||
* @node: the iteration variable and the starting node.
|
||||
* @unvisited: a nodemask to keep track of the unvisited nodes.
|
||||
*
|
||||
* This macro iterates over NUMA node IDs in increasing distance from the
|
||||
* starting @node and yields MAX_NUMNODES when all the nodes have been
|
||||
* visited.
|
||||
*
|
||||
* Note that by the time the loop completes, the @unvisited nodemask will
|
||||
* be fully cleared, unless the loop exits early.
|
||||
*
|
||||
* The difference between for_each_node() and for_each_node_numadist() is
|
||||
* that the former allows to iterate over nodes in numerical order, whereas
|
||||
* the latter iterates over nodes in increasing order of distance.
|
||||
*
|
||||
* This complexity of this iterator is O(N^2), where N represents the
|
||||
* number of nodes, as each iteration involves scanning all nodes to
|
||||
* find the one with the shortest distance.
|
||||
*
|
||||
* Requires rcu_lock to be held.
|
||||
*/
|
||||
#define for_each_node_numadist(node, unvisited) \
|
||||
for (int __start = (node), \
|
||||
(node) = nearest_node_nodemask((__start), &(unvisited)); \
|
||||
(node) < MAX_NUMNODES; \
|
||||
node_clear((node), (unvisited)), \
|
||||
(node) = nearest_node_nodemask((__start), &(unvisited)))
|
||||
|
||||
/**
|
||||
* for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance
|
||||
* from a given node.
|
||||
|
||||
@@ -26,6 +26,25 @@ TRACE_EVENT(sched_ext_dump,
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_ext_event,
|
||||
TP_PROTO(const char *name, __s64 delta),
|
||||
TP_ARGS(name, delta),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field( __s64, delta )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name);
|
||||
__entry->delta = delta;
|
||||
),
|
||||
|
||||
TP_printk("name %s delta %lld",
|
||||
__get_str(name), __entry->delta
|
||||
)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_SCHED_EXT_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
# include "ext.c"
|
||||
# include "ext_idle.c"
|
||||
#endif
|
||||
|
||||
#include "syscalls.c"
|
||||
|
||||
@@ -3922,13 +3922,8 @@ bool cpus_share_resources(int this_cpu, int that_cpu)
|
||||
|
||||
static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
|
||||
{
|
||||
/*
|
||||
* The BPF scheduler may depend on select_task_rq() being invoked during
|
||||
* wakeups. In addition, @p may end up executing on a different CPU
|
||||
* regardless of what happens in the wakeup path making the ttwu_queue
|
||||
* optimization less meaningful. Skip if on SCX.
|
||||
*/
|
||||
if (task_on_scx(p))
|
||||
/* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */
|
||||
if (!scx_allow_ttwu_queue(p))
|
||||
return false;
|
||||
|
||||
/*
|
||||
|
||||
1085
kernel/sched/ext.c
1085
kernel/sched/ext.c
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,8 @@
|
||||
*/
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
|
||||
|
||||
void scx_tick(struct rq *rq);
|
||||
void init_scx_entity(struct sched_ext_entity *scx);
|
||||
void scx_pre_fork(struct task_struct *p);
|
||||
@@ -34,6 +36,13 @@ static inline bool task_on_scx(const struct task_struct *p)
|
||||
return scx_enabled() && p->sched_class == &ext_sched_class;
|
||||
}
|
||||
|
||||
static inline bool scx_allow_ttwu_queue(const struct task_struct *p)
|
||||
{
|
||||
return !scx_enabled() ||
|
||||
static_branch_likely(&scx_ops_allow_queued_wakeup) ||
|
||||
p->sched_class != &ext_sched_class;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
|
||||
bool in_fi);
|
||||
@@ -52,6 +61,7 @@ static inline void scx_rq_activate(struct rq *rq) {}
|
||||
static inline void scx_rq_deactivate(struct rq *rq) {}
|
||||
static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
|
||||
static inline bool task_on_scx(const struct task_struct *p) { return false; }
|
||||
static inline bool scx_allow_ttwu_queue(const struct task_struct *p) { return true; }
|
||||
static inline void init_sched_ext_class(void) {}
|
||||
|
||||
#endif /* CONFIG_SCHED_CLASS_EXT */
|
||||
|
||||
1171
kernel/sched/ext_idle.c
Normal file
1171
kernel/sched/ext_idle.c
Normal file
File diff suppressed because it is too large
Load Diff
35
kernel/sched/ext_idle.h
Normal file
35
kernel/sched/ext_idle.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
|
||||
*
|
||||
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
||||
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
|
||||
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
||||
* Copyright (c) 2024 Andrea Righi <arighi@nvidia.com>
|
||||
*/
|
||||
#ifndef _KERNEL_SCHED_EXT_IDLE_H
|
||||
#define _KERNEL_SCHED_EXT_IDLE_H
|
||||
|
||||
struct sched_ext_ops;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops);
|
||||
void scx_idle_init_masks(void);
|
||||
bool scx_idle_test_and_clear_cpu(int cpu);
|
||||
s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags);
|
||||
#else /* !CONFIG_SMP */
|
||||
static inline void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) {}
|
||||
static inline void scx_idle_init_masks(void) {}
|
||||
static inline bool scx_idle_test_and_clear_cpu(int cpu) { return false; }
|
||||
static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags)
|
||||
{
|
||||
return -EBUSY;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags);
|
||||
void scx_idle_enable(struct sched_ext_ops *ops);
|
||||
void scx_idle_disable(void);
|
||||
int scx_idle_init(void);
|
||||
|
||||
#endif /* _KERNEL_SCHED_EXT_IDLE_H */
|
||||
@@ -196,6 +196,37 @@ int numa_nearest_node(int node, unsigned int state)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(numa_nearest_node);
|
||||
|
||||
/**
|
||||
* nearest_node_nodemask - Find the node in @mask at the nearest distance
|
||||
* from @node.
|
||||
*
|
||||
* @node: a valid node ID to start the search from.
|
||||
* @mask: a pointer to a nodemask representing the allowed nodes.
|
||||
*
|
||||
* This function iterates over all nodes in @mask and calculates the
|
||||
* distance from the starting @node, then it returns the node ID that is
|
||||
* the closest to @node, or MAX_NUMNODES if no node is found.
|
||||
*
|
||||
* Note that @node must be a valid node ID usable with node_distance(),
|
||||
* providing an invalid node ID (e.g., NUMA_NO_NODE) may result in crashes
|
||||
* or unexpected behavior.
|
||||
*/
|
||||
int nearest_node_nodemask(int node, nodemask_t *mask)
|
||||
{
|
||||
int dist, n, min_dist = INT_MAX, min_node = MAX_NUMNODES;
|
||||
|
||||
for_each_node_mask(n, *mask) {
|
||||
dist = node_distance(node, n);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
min_node = n;
|
||||
}
|
||||
}
|
||||
|
||||
return min_node;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nearest_node_nodemask);
|
||||
|
||||
struct mempolicy *get_task_policy(struct task_struct *p)
|
||||
{
|
||||
struct mempolicy *pol = p->mempolicy;
|
||||
|
||||
@@ -7,6 +7,13 @@
|
||||
#ifndef __SCX_COMMON_BPF_H
|
||||
#define __SCX_COMMON_BPF_H
|
||||
|
||||
/*
|
||||
* The generated kfunc prototypes in vmlinux.h are missing address space
|
||||
* attributes which cause build failures. For now, suppress the generated
|
||||
* prototypes. See https://github.com/sched-ext/scx/issues/1111.
|
||||
*/
|
||||
#define BPF_NO_KFUNC_PROTOTYPES
|
||||
|
||||
#ifdef LSP
|
||||
#define __bpf__
|
||||
#include "../vmlinux.h"
|
||||
@@ -18,6 +25,7 @@
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <asm-generic/errno.h>
|
||||
#include "user_exit_info.h"
|
||||
#include "enum_defs.autogen.h"
|
||||
|
||||
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
|
||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||
@@ -62,21 +70,28 @@ void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym
|
||||
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
|
||||
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
|
||||
void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
|
||||
u32 scx_bpf_nr_node_ids(void) __ksym __weak;
|
||||
u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
|
||||
int scx_bpf_cpu_node(s32 cpu) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
|
||||
void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
|
||||
const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
|
||||
void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
|
||||
bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
|
||||
s32 scx_bpf_pick_idle_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak;
|
||||
s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
s32 scx_bpf_pick_any_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak;
|
||||
s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
|
||||
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
|
||||
u64 scx_bpf_now(void) __ksym __weak;
|
||||
void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
|
||||
|
||||
/*
|
||||
* Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from
|
||||
@@ -84,6 +99,9 @@ u64 scx_bpf_now(void) __ksym __weak;
|
||||
*/
|
||||
#define BPF_FOR_EACH_ITER (&___it)
|
||||
|
||||
#define scx_read_event(e, name) \
|
||||
(bpf_core_field_exists((e)->name) ? (e)->name : 0)
|
||||
|
||||
static inline __attribute__((format(printf, 1, 2)))
|
||||
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||
|
||||
@@ -584,6 +602,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||
__u.__val; \
|
||||
})
|
||||
|
||||
#define READ_ONCE_ARENA(type, x) \
|
||||
({ \
|
||||
union { type __val; char __c[1]; } __u = \
|
||||
{ .__c = { 0 } }; \
|
||||
__read_once_size((void *)&(x), __u.__c, sizeof(x)); \
|
||||
__u.__val; \
|
||||
})
|
||||
|
||||
#define WRITE_ONCE_ARENA(type, x, val) \
|
||||
({ \
|
||||
union { type __val; char __c[1]; } __u = \
|
||||
{ .__val = (val) }; \
|
||||
__write_once_size((void *)&(x), __u.__c, sizeof(x)); \
|
||||
__u.__val; \
|
||||
})
|
||||
|
||||
/*
|
||||
* log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
|
||||
* @v: The value for which we're computing the base 2 logarithm.
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <errno.h>
|
||||
#include "enum_defs.autogen.h"
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
|
||||
@@ -125,11 +125,106 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
|
||||
false; \
|
||||
})
|
||||
|
||||
/**
|
||||
* __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
|
||||
* in a compatible way. We will preserve this __COMPAT helper until v6.16.
|
||||
*
|
||||
* @enq_flags: enqueue flags from ops.enqueue()
|
||||
*
|
||||
* Return: True if SCX_ENQ_CPU_SELECTED is turned on in @enq_flags
|
||||
*/
|
||||
static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
|
||||
{
|
||||
#ifdef HAVE_SCX_ENQ_CPU_SELECTED
|
||||
/*
|
||||
* This is the case that a BPF code compiled against vmlinux.h
|
||||
* where the enum SCX_ENQ_CPU_SELECTED exists.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We should temporarily suspend the macro expansion of
|
||||
* 'SCX_ENQ_CPU_SELECTED'. This avoids 'SCX_ENQ_CPU_SELECTED' being
|
||||
* rewritten to '__SCX_ENQ_CPU_SELECTED' when 'SCX_ENQ_CPU_SELECTED'
|
||||
* is defined in 'scripts/gen_enums.py'.
|
||||
*/
|
||||
#pragma push_macro("SCX_ENQ_CPU_SELECTED")
|
||||
#undef SCX_ENQ_CPU_SELECTED
|
||||
u64 flag;
|
||||
|
||||
/*
|
||||
* When the kernel did not have SCX_ENQ_CPU_SELECTED,
|
||||
* select_task_rq_scx() has never been skipped. Thus, this case
|
||||
* should be considered that the CPU has already been selected.
|
||||
*/
|
||||
if (!bpf_core_enum_value_exists(enum scx_enq_flags,
|
||||
SCX_ENQ_CPU_SELECTED))
|
||||
return true;
|
||||
|
||||
flag = bpf_core_enum_value(enum scx_enq_flags, SCX_ENQ_CPU_SELECTED);
|
||||
return enq_flags & flag;
|
||||
|
||||
/*
|
||||
* Once done, resume the macro expansion of 'SCX_ENQ_CPU_SELECTED'.
|
||||
*/
|
||||
#pragma pop_macro("SCX_ENQ_CPU_SELECTED")
|
||||
#else
|
||||
/*
|
||||
* This is the case that a BPF code compiled against vmlinux.h
|
||||
* where the enum SCX_ENQ_CPU_SELECTED does NOT exist.
|
||||
*/
|
||||
return true;
|
||||
#endif /* HAVE_SCX_ENQ_CPU_SELECTED */
|
||||
}
|
||||
|
||||
|
||||
#define scx_bpf_now() \
|
||||
(bpf_ksym_exists(scx_bpf_now) ? \
|
||||
scx_bpf_now() : \
|
||||
bpf_ktime_get_ns())
|
||||
|
||||
/*
|
||||
* v6.15: Introduce event counters.
|
||||
*
|
||||
* Preserve the following macro until v6.17.
|
||||
*/
|
||||
#define __COMPAT_scx_bpf_events(events, size) \
|
||||
(bpf_ksym_exists(scx_bpf_events) ? \
|
||||
scx_bpf_events(events, size) : ({}))
|
||||
|
||||
/*
|
||||
* v6.15: Introduce NUMA-aware kfuncs to operate with per-node idle
|
||||
* cpumasks.
|
||||
*
|
||||
* Preserve the following __COMPAT_scx_*_node macros until v6.17.
|
||||
*/
|
||||
#define __COMPAT_scx_bpf_nr_node_ids() \
|
||||
(bpf_ksym_exists(scx_bpf_nr_node_ids) ? \
|
||||
scx_bpf_nr_node_ids() : 1U)
|
||||
|
||||
#define __COMPAT_scx_bpf_cpu_node(cpu) \
|
||||
(bpf_ksym_exists(scx_bpf_cpu_node) ? \
|
||||
scx_bpf_cpu_node(cpu) : 0)
|
||||
|
||||
#define __COMPAT_scx_bpf_get_idle_cpumask_node(node) \
|
||||
(bpf_ksym_exists(scx_bpf_get_idle_cpumask_node) ? \
|
||||
scx_bpf_get_idle_cpumask_node(node) : \
|
||||
scx_bpf_get_idle_cpumask()) \
|
||||
|
||||
#define __COMPAT_scx_bpf_get_idle_smtmask_node(node) \
|
||||
(bpf_ksym_exists(scx_bpf_get_idle_smtmask_node) ? \
|
||||
scx_bpf_get_idle_smtmask_node(node) : \
|
||||
scx_bpf_get_idle_smtmask())
|
||||
|
||||
#define __COMPAT_scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) \
|
||||
(bpf_ksym_exists(scx_bpf_pick_idle_cpu_node) ? \
|
||||
scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) : \
|
||||
scx_bpf_pick_idle_cpu(cpus_allowed, flags))
|
||||
|
||||
#define __COMPAT_scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) \
|
||||
(bpf_ksym_exists(scx_bpf_pick_any_cpu_node) ? \
|
||||
scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) : \
|
||||
scx_bpf_pick_any_cpu(cpus_allowed, flags))
|
||||
|
||||
/*
|
||||
* Define sched_ext_ops. This may be expanded to define multiple variants for
|
||||
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
|
||||
|
||||
@@ -106,8 +106,20 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
|
||||
return false;
|
||||
}
|
||||
|
||||
#define SCX_OPS_SWITCH_PARTIAL \
|
||||
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
|
||||
#define SCX_OPS_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_ops_flags", #name)
|
||||
|
||||
#define SCX_OPS_KEEP_BUILTIN_IDLE SCX_OPS_FLAG(SCX_OPS_KEEP_BUILTIN_IDLE)
|
||||
#define SCX_OPS_ENQ_LAST SCX_OPS_FLAG(SCX_OPS_ENQ_LAST)
|
||||
#define SCX_OPS_ENQ_EXITING SCX_OPS_FLAG(SCX_OPS_ENQ_EXITING)
|
||||
#define SCX_OPS_SWITCH_PARTIAL SCX_OPS_FLAG(SCX_OPS_SWITCH_PARTIAL)
|
||||
#define SCX_OPS_ENQ_MIGRATION_DISABLED SCX_OPS_FLAG(SCX_OPS_ENQ_MIGRATION_DISABLED)
|
||||
#define SCX_OPS_ALLOW_QUEUED_WAKEUP SCX_OPS_FLAG(SCX_OPS_ALLOW_QUEUED_WAKEUP)
|
||||
#define SCX_OPS_BUILTIN_IDLE_PER_NODE SCX_OPS_FLAG(SCX_OPS_BUILTIN_IDLE_PER_NODE)
|
||||
|
||||
#define SCX_PICK_IDLE_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_pick_idle_cpu_flags", #name)
|
||||
|
||||
#define SCX_PICK_IDLE_CORE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_CORE)
|
||||
#define SCX_PICK_IDLE_IN_NODE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_IN_NODE)
|
||||
|
||||
static inline long scx_hotplug_seq(void)
|
||||
{
|
||||
|
||||
120
tools/sched_ext/include/scx/enum_defs.autogen.h
Normal file
120
tools/sched_ext/include/scx/enum_defs.autogen.h
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* WARNING: This file is autogenerated from gen_enum_defs.py [1].
|
||||
*
|
||||
* [1] https://github.com/sched-ext/scx/blob/main/scripts/gen_enum_defs.py
|
||||
*/
|
||||
|
||||
#ifndef __ENUM_DEFS_AUTOGEN_H__
|
||||
#define __ENUM_DEFS_AUTOGEN_H__
|
||||
|
||||
#define HAVE_SCX_DSP_DFL_MAX_BATCH
|
||||
#define HAVE_SCX_DSP_MAX_LOOPS
|
||||
#define HAVE_SCX_WATCHDOG_MAX_TIMEOUT
|
||||
#define HAVE_SCX_EXIT_BT_LEN
|
||||
#define HAVE_SCX_EXIT_MSG_LEN
|
||||
#define HAVE_SCX_EXIT_DUMP_DFL_LEN
|
||||
#define HAVE_SCX_CPUPERF_ONE
|
||||
#define HAVE_SCX_OPS_TASK_ITER_BATCH
|
||||
#define HAVE_SCX_CPU_PREEMPT_RT
|
||||
#define HAVE_SCX_CPU_PREEMPT_DL
|
||||
#define HAVE_SCX_CPU_PREEMPT_STOP
|
||||
#define HAVE_SCX_CPU_PREEMPT_UNKNOWN
|
||||
#define HAVE_SCX_DEQ_SLEEP
|
||||
#define HAVE_SCX_DEQ_CORE_SCHED_EXEC
|
||||
#define HAVE_SCX_DSQ_FLAG_BUILTIN
|
||||
#define HAVE_SCX_DSQ_FLAG_LOCAL_ON
|
||||
#define HAVE_SCX_DSQ_INVALID
|
||||
#define HAVE_SCX_DSQ_GLOBAL
|
||||
#define HAVE_SCX_DSQ_LOCAL
|
||||
#define HAVE_SCX_DSQ_LOCAL_ON
|
||||
#define HAVE_SCX_DSQ_LOCAL_CPU_MASK
|
||||
#define HAVE_SCX_DSQ_ITER_REV
|
||||
#define HAVE___SCX_DSQ_ITER_HAS_SLICE
|
||||
#define HAVE___SCX_DSQ_ITER_HAS_VTIME
|
||||
#define HAVE___SCX_DSQ_ITER_USER_FLAGS
|
||||
#define HAVE___SCX_DSQ_ITER_ALL_FLAGS
|
||||
#define HAVE_SCX_DSQ_LNODE_ITER_CURSOR
|
||||
#define HAVE___SCX_DSQ_LNODE_PRIV_SHIFT
|
||||
#define HAVE_SCX_ENQ_WAKEUP
|
||||
#define HAVE_SCX_ENQ_HEAD
|
||||
#define HAVE_SCX_ENQ_CPU_SELECTED
|
||||
#define HAVE_SCX_ENQ_PREEMPT
|
||||
#define HAVE_SCX_ENQ_REENQ
|
||||
#define HAVE_SCX_ENQ_LAST
|
||||
#define HAVE___SCX_ENQ_INTERNAL_MASK
|
||||
#define HAVE_SCX_ENQ_CLEAR_OPSS
|
||||
#define HAVE_SCX_ENQ_DSQ_PRIQ
|
||||
#define HAVE_SCX_TASK_DSQ_ON_PRIQ
|
||||
#define HAVE_SCX_TASK_QUEUED
|
||||
#define HAVE_SCX_TASK_RESET_RUNNABLE_AT
|
||||
#define HAVE_SCX_TASK_DEQD_FOR_SLEEP
|
||||
#define HAVE_SCX_TASK_STATE_SHIFT
|
||||
#define HAVE_SCX_TASK_STATE_BITS
|
||||
#define HAVE_SCX_TASK_STATE_MASK
|
||||
#define HAVE_SCX_TASK_CURSOR
|
||||
#define HAVE_SCX_ECODE_RSN_HOTPLUG
|
||||
#define HAVE_SCX_ECODE_ACT_RESTART
|
||||
#define HAVE_SCX_EXIT_NONE
|
||||
#define HAVE_SCX_EXIT_DONE
|
||||
#define HAVE_SCX_EXIT_UNREG
|
||||
#define HAVE_SCX_EXIT_UNREG_BPF
|
||||
#define HAVE_SCX_EXIT_UNREG_KERN
|
||||
#define HAVE_SCX_EXIT_SYSRQ
|
||||
#define HAVE_SCX_EXIT_ERROR
|
||||
#define HAVE_SCX_EXIT_ERROR_BPF
|
||||
#define HAVE_SCX_EXIT_ERROR_STALL
|
||||
#define HAVE_SCX_KF_UNLOCKED
|
||||
#define HAVE_SCX_KF_CPU_RELEASE
|
||||
#define HAVE_SCX_KF_DISPATCH
|
||||
#define HAVE_SCX_KF_ENQUEUE
|
||||
#define HAVE_SCX_KF_SELECT_CPU
|
||||
#define HAVE_SCX_KF_REST
|
||||
#define HAVE___SCX_KF_RQ_LOCKED
|
||||
#define HAVE___SCX_KF_TERMINAL
|
||||
#define HAVE_SCX_KICK_IDLE
|
||||
#define HAVE_SCX_KICK_PREEMPT
|
||||
#define HAVE_SCX_KICK_WAIT
|
||||
#define HAVE_SCX_OPI_BEGIN
|
||||
#define HAVE_SCX_OPI_NORMAL_BEGIN
|
||||
#define HAVE_SCX_OPI_NORMAL_END
|
||||
#define HAVE_SCX_OPI_CPU_HOTPLUG_BEGIN
|
||||
#define HAVE_SCX_OPI_CPU_HOTPLUG_END
|
||||
#define HAVE_SCX_OPI_END
|
||||
#define HAVE_SCX_OPS_ENABLING
|
||||
#define HAVE_SCX_OPS_ENABLED
|
||||
#define HAVE_SCX_OPS_DISABLING
|
||||
#define HAVE_SCX_OPS_DISABLED
|
||||
#define HAVE_SCX_OPS_KEEP_BUILTIN_IDLE
|
||||
#define HAVE_SCX_OPS_ENQ_LAST
|
||||
#define HAVE_SCX_OPS_ENQ_EXITING
|
||||
#define HAVE_SCX_OPS_SWITCH_PARTIAL
|
||||
#define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT
|
||||
#define HAVE_SCX_OPS_ALL_FLAGS
|
||||
#define HAVE_SCX_OPSS_NONE
|
||||
#define HAVE_SCX_OPSS_QUEUEING
|
||||
#define HAVE_SCX_OPSS_QUEUED
|
||||
#define HAVE_SCX_OPSS_DISPATCHING
|
||||
#define HAVE_SCX_OPSS_QSEQ_SHIFT
|
||||
#define HAVE_SCX_PICK_IDLE_CORE
|
||||
#define HAVE_SCX_OPS_NAME_LEN
|
||||
#define HAVE_SCX_SLICE_DFL
|
||||
#define HAVE_SCX_SLICE_INF
|
||||
#define HAVE_SCX_RQ_ONLINE
|
||||
#define HAVE_SCX_RQ_CAN_STOP_TICK
|
||||
#define HAVE_SCX_RQ_BAL_PENDING
|
||||
#define HAVE_SCX_RQ_BAL_KEEP
|
||||
#define HAVE_SCX_RQ_BYPASSING
|
||||
#define HAVE_SCX_RQ_IN_WAKEUP
|
||||
#define HAVE_SCX_RQ_IN_BALANCE
|
||||
#define HAVE_SCX_TASK_NONE
|
||||
#define HAVE_SCX_TASK_INIT
|
||||
#define HAVE_SCX_TASK_READY
|
||||
#define HAVE_SCX_TASK_ENABLED
|
||||
#define HAVE_SCX_TASK_NR_STATES
|
||||
#define HAVE_SCX_TG_ONLINE
|
||||
#define HAVE_SCX_TG_INITED
|
||||
#define HAVE_SCX_WAKE_FORK
|
||||
#define HAVE_SCX_WAKE_TTWU
|
||||
#define HAVE_SCX_WAKE_SYNC
|
||||
|
||||
#endif /* __ENUM_DEFS_AUTOGEN_H__ */
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <unistd.h>
|
||||
#include <inttypes.h>
|
||||
#include <signal.h>
|
||||
#include <assert.h>
|
||||
#include <libgen.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <scx/common.h>
|
||||
@@ -60,14 +61,22 @@ int main(int argc, char **argv)
|
||||
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
|
||||
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
|
||||
break;
|
||||
case 'c':
|
||||
skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
|
||||
case 'c': {
|
||||
u32 central_cpu = strtoul(optarg, NULL, 0);
|
||||
if (central_cpu >= skel->rodata->nr_cpu_ids) {
|
||||
fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids);
|
||||
return -1;
|
||||
}
|
||||
skel->rodata->central_cpu = (s32)central_cpu;
|
||||
break;
|
||||
}
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
@@ -96,7 +105,7 @@ int main(int argc, char **argv)
|
||||
*/
|
||||
cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
|
||||
SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
|
||||
CPU_ZERO(cpuset);
|
||||
CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset);
|
||||
CPU_SET(skel->rodata->central_cpu, cpuset);
|
||||
SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
|
||||
"Failed to affinitize to central CPU %d (max %d)",
|
||||
|
||||
@@ -231,7 +231,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
}
|
||||
|
||||
/* if select_cpu() wasn't called, try direct dispatch */
|
||||
if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
|
||||
if (!__COMPAT_is_enq_cpu_selected(enq_flags) &&
|
||||
(cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
|
||||
__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
|
||||
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
|
||||
@@ -763,6 +763,8 @@ static void dump_shared_dsq(void)
|
||||
|
||||
static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
{
|
||||
struct scx_event_stats events;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
dispatch_highpri(true);
|
||||
bpf_rcu_read_unlock();
|
||||
@@ -772,6 +774,25 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
if (print_shared_dsq)
|
||||
dump_shared_dsq();
|
||||
|
||||
__COMPAT_scx_bpf_events(&events, sizeof(events));
|
||||
|
||||
bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
|
||||
scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
|
||||
scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
|
||||
scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
|
||||
scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL",
|
||||
scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
|
||||
scx_read_event(&events, SCX_EV_BYPASS_DURATION));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
|
||||
scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
|
||||
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
|
||||
scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
|
||||
|
||||
bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -172,6 +172,7 @@ auto-test-targets := \
|
||||
maximal \
|
||||
maybe_null \
|
||||
minimal \
|
||||
numa \
|
||||
prog_run \
|
||||
reload_loop \
|
||||
select_cpu_dfl \
|
||||
|
||||
100
tools/testing/selftests/sched_ext/numa.bpf.c
Normal file
100
tools/testing/selftests/sched_ext/numa.bpf.c
Normal file
@@ -0,0 +1,100 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* A scheduler that validates the behavior of the NUMA-aware
|
||||
* functionalities.
|
||||
*
|
||||
* The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
|
||||
* are exclusively processed by CPUs within their respective nodes. Idle
|
||||
* CPUs are selected only within the same node, so task migration can only
|
||||
* occurs between CPUs belonging to the same node.
|
||||
*
|
||||
* Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
|
||||
*/
|
||||
|
||||
#include <scx/common.bpf.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
UEI_DEFINE(uei);
|
||||
|
||||
const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
|
||||
|
||||
static bool is_cpu_idle(s32 cpu, int node)
|
||||
{
|
||||
const struct cpumask *idle_cpumask;
|
||||
bool idle;
|
||||
|
||||
idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
|
||||
idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
|
||||
scx_bpf_put_cpumask(idle_cpumask);
|
||||
|
||||
return idle;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(numa_select_cpu,
|
||||
struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
|
||||
s32 cpu;
|
||||
|
||||
/*
|
||||
* We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
|
||||
* since it already tries to pick an idle CPU within the node
|
||||
* first, but let's use both functions for better testing coverage.
|
||||
*/
|
||||
cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
|
||||
__COMPAT_SCX_PICK_IDLE_IN_NODE);
|
||||
if (cpu < 0)
|
||||
cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
|
||||
__COMPAT_SCX_PICK_IDLE_IN_NODE);
|
||||
|
||||
if (is_cpu_idle(cpu, node))
|
||||
scx_bpf_error("CPU %d should be marked as busy", cpu);
|
||||
|
||||
if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
|
||||
scx_bpf_error("CPU %d should be in node %d", cpu, node);
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
{
|
||||
int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
|
||||
|
||||
scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
|
||||
{
|
||||
int node = __COMPAT_scx_bpf_cpu_node(cpu);
|
||||
|
||||
scx_bpf_dsq_move_to_local(node);
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
|
||||
{
|
||||
int node, err;
|
||||
|
||||
bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
|
||||
err = scx_bpf_create_dsq(node, node);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
|
||||
{
|
||||
UEI_RECORD(uei, ei);
|
||||
}
|
||||
|
||||
SEC(".struct_ops.link")
|
||||
struct sched_ext_ops numa_ops = {
|
||||
.select_cpu = (void *)numa_select_cpu,
|
||||
.enqueue = (void *)numa_enqueue,
|
||||
.dispatch = (void *)numa_dispatch,
|
||||
.init = (void *)numa_init,
|
||||
.exit = (void *)numa_exit,
|
||||
.name = "numa",
|
||||
};
|
||||
59
tools/testing/selftests/sched_ext/numa.c
Normal file
59
tools/testing/selftests/sched_ext/numa.c
Normal file
@@ -0,0 +1,59 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
|
||||
*/
|
||||
#include <bpf/bpf.h>
|
||||
#include <scx/common.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include "numa.bpf.skel.h"
|
||||
#include "scx_test.h"
|
||||
|
||||
static enum scx_test_status setup(void **ctx)
|
||||
{
|
||||
struct numa *skel;
|
||||
|
||||
skel = numa__open();
|
||||
SCX_FAIL_IF(!skel, "Failed to open");
|
||||
SCX_ENUM_INIT(skel);
|
||||
skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE;
|
||||
skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE;
|
||||
SCX_FAIL_IF(numa__load(skel), "Failed to load skel");
|
||||
|
||||
*ctx = skel;
|
||||
|
||||
return SCX_TEST_PASS;
|
||||
}
|
||||
|
||||
static enum scx_test_status run(void *ctx)
|
||||
{
|
||||
struct numa *skel = ctx;
|
||||
struct bpf_link *link;
|
||||
|
||||
link = bpf_map__attach_struct_ops(skel->maps.numa_ops);
|
||||
SCX_FAIL_IF(!link, "Failed to attach scheduler");
|
||||
|
||||
/* Just sleeping is fine, plenty of scheduling events happening */
|
||||
sleep(1);
|
||||
|
||||
SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
|
||||
bpf_link__destroy(link);
|
||||
|
||||
return SCX_TEST_PASS;
|
||||
}
|
||||
|
||||
static void cleanup(void *ctx)
|
||||
{
|
||||
struct numa *skel = ctx;
|
||||
|
||||
numa__destroy(skel);
|
||||
}
|
||||
|
||||
struct scx_test numa = {
|
||||
.name = "numa",
|
||||
.description = "Verify NUMA-aware functionalities",
|
||||
.setup = setup,
|
||||
.run = run,
|
||||
.cleanup = cleanup,
|
||||
};
|
||||
REGISTER_SCX_TEST(&numa)
|
||||
Reference in New Issue
Block a user