mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 02:01:18 -04:00
Merge tag 'cgroup-for-7.1-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
- cpuset fixes:
- Partition invalidation could return CPUs still in use by sibling
partitions, producing overlapping effective_cpus
- cpuset_can_attach() over-reserved DL bandwidth on moves that
stayed within the same root domain
- Pending DL migration state leaked into later attaches when a
later can_attach() check failed
- Reorder PF_EXITING and __GFP_HARDWALL checks so dying tasks can
allocate from any node and exit quickly
- dmem: propagate -ENOMEM instead of spinning forever when the fallback
pool allocation also fails
- selftests/cgroup: percpu test error-path leak, bogus numeric
comparison of cpuset strings, and a zero-length read() that silently
passed OOM-kill tests
* tag 'cgroup-for-7.1-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup/cpuset: Return only actually allocated CPUs during partition invalidation
selftests/cgroup: Fix error path leaks in test_percpu_basic
cgroup/cpuset: Reserve DL bandwidth only for root-domain moves
cgroup/cpuset: Reset DL migration state on can_attach() failure
selftests/cgroup: Fix string comparison in write_test
selftests/cgroup: Fix cg_read_strcmp() empty string comparison
cgroup/dmem: Return -ENOMEM on failed pool preallocation
cgroup/cpuset: move PF_EXITING check before __GFP_HARDWALL in cpuset_current_node_allowed()
This commit is contained in:
@@ -33,6 +33,15 @@ struct root_domain;
|
||||
extern void dl_add_task_root_domain(struct task_struct *p);
|
||||
extern void dl_clear_root_domain(struct root_domain *rd);
|
||||
extern void dl_clear_root_domain_cpu(int cpu);
|
||||
/*
|
||||
* Return whether moving DL task @p to @new_mask requires moving DL
|
||||
* bandwidth accounting between root domains. This helper is specific to
|
||||
* DL bandwidth move accounting semantics and is shared by
|
||||
* cpuset_can_attach() and set_cpus_allowed_dl() so both paths use the
|
||||
* same source root-domain test.
|
||||
*/
|
||||
extern bool dl_task_needs_bw_move(struct task_struct *p,
|
||||
const struct cpumask *new_mask);
|
||||
|
||||
extern u64 dl_cookie;
|
||||
extern bool dl_bw_visited(int cpu, u64 cookie);
|
||||
|
||||
@@ -167,6 +167,7 @@ struct cpuset {
|
||||
*/
|
||||
int nr_deadline_tasks;
|
||||
int nr_migrate_dl_tasks;
|
||||
/* DL bandwidth that needs destination reservation for this attach. */
|
||||
u64 sum_migrate_dl_bw;
|
||||
/*
|
||||
* CPU used for temporary DL bandwidth allocation during attach;
|
||||
|
||||
@@ -1718,7 +1718,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||
*/
|
||||
if (is_partition_valid(parent))
|
||||
adding = cpumask_and(tmp->addmask,
|
||||
xcpus, parent->effective_xcpus);
|
||||
cs->effective_xcpus,
|
||||
parent->effective_xcpus);
|
||||
if (old_prs > 0)
|
||||
new_prs = -old_prs;
|
||||
|
||||
@@ -2993,7 +2994,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
struct cpuset *cs, *oldcs;
|
||||
struct task_struct *task;
|
||||
bool setsched_check;
|
||||
int ret;
|
||||
int cpu, ret;
|
||||
|
||||
/* used later by cpuset_attach() */
|
||||
cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
|
||||
@@ -3038,39 +3039,42 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
}
|
||||
|
||||
if (dl_task(task)) {
|
||||
/*
|
||||
* Count all migrating DL tasks for cpuset task accounting.
|
||||
* Only tasks that need a root-domain bandwidth move
|
||||
* contribute to sum_migrate_dl_bw.
|
||||
*/
|
||||
cs->nr_migrate_dl_tasks++;
|
||||
cs->sum_migrate_dl_bw += task->dl.dl_bw;
|
||||
if (dl_task_needs_bw_move(task, cs->effective_cpus))
|
||||
cs->sum_migrate_dl_bw += task->dl.dl_bw;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cs->nr_migrate_dl_tasks)
|
||||
if (!cs->sum_migrate_dl_bw)
|
||||
goto out_success;
|
||||
|
||||
if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
|
||||
int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
|
||||
|
||||
if (unlikely(cpu >= nr_cpu_ids)) {
|
||||
reset_migrate_dl_data(cs);
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
|
||||
if (ret) {
|
||||
reset_migrate_dl_data(cs);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
cs->dl_bw_cpu = cpu;
|
||||
cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
|
||||
if (unlikely(cpu >= nr_cpu_ids)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
cs->dl_bw_cpu = cpu;
|
||||
|
||||
out_success:
|
||||
/*
|
||||
* Mark attach is in progress. This makes validate_change() fail
|
||||
* changes which zero cpus/mems_allowed.
|
||||
*/
|
||||
cs->attach_in_progress++;
|
||||
|
||||
out_unlock:
|
||||
if (ret)
|
||||
reset_migrate_dl_data(cs);
|
||||
mutex_unlock(&cpuset_mutex);
|
||||
return ret;
|
||||
}
|
||||
@@ -4176,11 +4180,11 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
|
||||
* current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this
|
||||
* node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
|
||||
* yes. If current has access to memory reserves as an oom victim, yes.
|
||||
* Otherwise, no.
|
||||
* If the current task is PF_EXITING, yes. Otherwise, no.
|
||||
*
|
||||
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
|
||||
* and do not allow allocations outside the current tasks cpuset
|
||||
* unless the task has been OOM killed.
|
||||
* unless the task has been OOM killed or is exiting.
|
||||
* GFP_KERNEL allocations are not so marked, so can escape to the
|
||||
* nearest enclosing hardwalled ancestor cpuset.
|
||||
*
|
||||
@@ -4194,7 +4198,9 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
|
||||
* The first call here from mm/page_alloc:get_page_from_freelist()
|
||||
* has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
|
||||
* so no allocation on a node outside the cpuset is allowed (unless
|
||||
* in interrupt, of course).
|
||||
* in interrupt, of course). The PF_EXITING check must therefore
|
||||
* come before the __GFP_HARDWALL check, otherwise a dying task
|
||||
* would be blocked on the fast path.
|
||||
*
|
||||
* The second pass through get_page_from_freelist() doesn't even call
|
||||
* here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
|
||||
@@ -4204,6 +4210,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
|
||||
* in_interrupt - any node ok (current task context irrelevant)
|
||||
* GFP_ATOMIC - any node ok
|
||||
* tsk_is_oom_victim - any node ok
|
||||
* PF_EXITING - any node ok (let dying task exit quickly)
|
||||
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
|
||||
* GFP_USER - only nodes in current tasks mems allowed ok.
|
||||
*/
|
||||
@@ -4223,11 +4230,10 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask)
|
||||
*/
|
||||
if (unlikely(tsk_is_oom_victim(current)))
|
||||
return true;
|
||||
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
|
||||
return false;
|
||||
|
||||
if (current->flags & PF_EXITING) /* Let dying task have memory */
|
||||
return true;
|
||||
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
|
||||
return false;
|
||||
|
||||
/* Not hardwall and node outside mems_allowed: scan up cpusets */
|
||||
spin_lock_irqsave(&callback_lock, flags);
|
||||
|
||||
@@ -602,6 +602,7 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
|
||||
pool = NULL;
|
||||
continue;
|
||||
}
|
||||
pool = ERR_PTR(-ENOMEM);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3107,20 +3107,18 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
|
||||
static void set_cpus_allowed_dl(struct task_struct *p,
|
||||
struct affinity_context *ctx)
|
||||
{
|
||||
struct root_domain *src_rd;
|
||||
struct rq *rq;
|
||||
|
||||
WARN_ON_ONCE(!dl_task(p));
|
||||
|
||||
rq = task_rq(p);
|
||||
src_rd = rq->rd;
|
||||
/*
|
||||
* Migrating a SCHED_DEADLINE task between exclusive
|
||||
* cpusets (different root_domains) entails a bandwidth
|
||||
* update. We already made space for us in the destination
|
||||
* domain (see cpuset_can_attach()).
|
||||
*/
|
||||
if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
|
||||
if (dl_task_needs_bw_move(p, ctx->new_mask)) {
|
||||
struct dl_bw *src_dl_b;
|
||||
|
||||
src_dl_b = dl_bw_of(cpu_of(rq));
|
||||
@@ -3137,6 +3135,15 @@ static void set_cpus_allowed_dl(struct task_struct *p,
|
||||
set_cpus_allowed_common(p, ctx);
|
||||
}
|
||||
|
||||
bool dl_task_needs_bw_move(struct task_struct *p,
|
||||
const struct cpumask *new_mask)
|
||||
{
|
||||
if (!dl_task(p))
|
||||
return false;
|
||||
|
||||
return !cpumask_intersects(task_rq(p)->rd->span, new_mask);
|
||||
}
|
||||
|
||||
/* Assumes rq->lock is held */
|
||||
static void rq_online_dl(struct rq *rq)
|
||||
{
|
||||
|
||||
@@ -106,8 +106,9 @@ int cg_read_strcmp(const char *cgroup, const char *control,
|
||||
/* Handle the case of comparing against empty string */
|
||||
if (!expected)
|
||||
return -1;
|
||||
else
|
||||
size = strlen(expected) + 1;
|
||||
|
||||
/* needs size > 1, otherwise cg_read() reads 0 bytes */
|
||||
size = (expected[0] == '\0') ? 2 : strlen(expected) + 1;
|
||||
|
||||
buf = malloc(size);
|
||||
if (!buf)
|
||||
|
||||
@@ -18,7 +18,7 @@ write_test() {
|
||||
echo "testing $interface $value"
|
||||
echo $value > $dir/$interface
|
||||
new=$(cat $dir/$interface)
|
||||
[[ $value -ne $(cat $dir/$interface) ]] && {
|
||||
[[ "$value" != "$new" ]] && {
|
||||
echo "$interface write $value failed: new:$new"
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -368,11 +368,15 @@ static int test_percpu_basic(const char *root)
|
||||
|
||||
for (i = 0; i < 1000; i++) {
|
||||
child = cg_name_indexed(parent, "child", i);
|
||||
if (!child)
|
||||
return -1;
|
||||
|
||||
if (cg_create(child))
|
||||
if (!child) {
|
||||
ret = -1;
|
||||
goto cleanup_children;
|
||||
}
|
||||
|
||||
if (cg_create(child)) {
|
||||
free(child);
|
||||
goto cleanup_children;
|
||||
}
|
||||
|
||||
free(child);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user