mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-05 01:32:47 -04:00
Merge branch 'v6.3-rc7'
Sync with the urgent patches; in particular:
a53ce18cac ("sched/fair: Sanitize vruntime of entity being migrated")
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
This commit is contained in:
@@ -972,7 +972,7 @@ static int __init bpf_jit_charge_init(void)
|
||||
{
|
||||
/* Only used as heuristic here to derive limit. */
|
||||
bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
|
||||
bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
|
||||
bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1,
|
||||
PAGE_SIZE), LONG_MAX);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3826,6 +3826,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
|
||||
continue;
|
||||
if (type == STACK_MISC)
|
||||
continue;
|
||||
if (type == STACK_INVALID && env->allow_uninit_stack)
|
||||
continue;
|
||||
verbose(env, "invalid read from stack off %d+%d size %d\n",
|
||||
off, i, size);
|
||||
return -EACCES;
|
||||
@@ -3863,6 +3865,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
|
||||
continue;
|
||||
if (type == STACK_ZERO)
|
||||
continue;
|
||||
if (type == STACK_INVALID && env->allow_uninit_stack)
|
||||
continue;
|
||||
verbose(env, "invalid read from stack off %d+%d size %d\n",
|
||||
off, i, size);
|
||||
return -EACCES;
|
||||
@@ -5754,7 +5758,8 @@ static int check_stack_range_initialized(
|
||||
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
|
||||
if (*stype == STACK_MISC)
|
||||
goto mark;
|
||||
if (*stype == STACK_ZERO) {
|
||||
if ((*stype == STACK_ZERO) ||
|
||||
(*stype == STACK_INVALID && env->allow_uninit_stack)) {
|
||||
if (clobber) {
|
||||
/* helper can write anything into the stack */
|
||||
*stype = STACK_MISC;
|
||||
@@ -13936,6 +13941,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
|
||||
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
|
||||
continue;
|
||||
|
||||
if (env->allow_uninit_stack &&
|
||||
old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
|
||||
continue;
|
||||
|
||||
/* explored stack has more populated slots than current stack
|
||||
* and these slots were used
|
||||
*/
|
||||
|
||||
@@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
if (adding || deleting)
|
||||
update_tasks_cpumask(parent, tmp->new_cpus);
|
||||
update_tasks_cpumask(parent, tmp->addmask);
|
||||
|
||||
/*
|
||||
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
|
||||
@@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
/*
|
||||
* Use the cpumasks in trialcs for tmpmasks when they are pointers
|
||||
* to allocated cpumasks.
|
||||
*
|
||||
* Note that update_parent_subparts_cpumask() uses only addmask &
|
||||
* delmask, but not new_cpus.
|
||||
*/
|
||||
tmp.addmask = trialcs->subparts_cpus;
|
||||
tmp.delmask = trialcs->effective_cpus;
|
||||
tmp.new_cpus = trialcs->cpus_allowed;
|
||||
tmp.new_cpus = NULL;
|
||||
#endif
|
||||
|
||||
retval = validate_change(cs, trialcs);
|
||||
@@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
}
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
||||
/* Now trialcs->cpus_allowed is available */
|
||||
tmp.new_cpus = trialcs->cpus_allowed;
|
||||
#endif
|
||||
|
||||
/* effective_cpus will be updated here */
|
||||
update_cpumasks_hier(cs, &tmp, false);
|
||||
|
||||
@@ -2445,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
|
||||
|
||||
static struct cpuset *cpuset_attach_old_cs;
|
||||
|
||||
/*
|
||||
* Check to see if a cpuset can accept a new task
|
||||
* For v1, cpus_allowed and mems_allowed can't be empty.
|
||||
* For v2, effective_cpus can't be empty.
|
||||
* Note that in v1, effective_cpus = cpus_allowed.
|
||||
*/
|
||||
static int cpuset_can_attach_check(struct cpuset *cs)
|
||||
{
|
||||
if (cpumask_empty(cs->effective_cpus) ||
|
||||
(!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
|
||||
return -ENOSPC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
|
||||
static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
@@ -2459,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
|
||||
/* allow moving tasks into an empty cpuset if on default hierarchy */
|
||||
ret = -ENOSPC;
|
||||
if (!is_in_v2_mode() &&
|
||||
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Task cannot be moved to a cpuset with empty effective cpus.
|
||||
*/
|
||||
if (cpumask_empty(cs->effective_cpus))
|
||||
/* Check to see if task is allowed in the cpuset */
|
||||
ret = cpuset_can_attach_check(cs);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
@@ -2485,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
* changes which zero cpus/mems_allowed.
|
||||
*/
|
||||
cs->attach_in_progress++;
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
return ret;
|
||||
@@ -2494,25 +2508,47 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cpuset *cs;
|
||||
|
||||
cgroup_taskset_first(tset, &css);
|
||||
cs = css_cs(css);
|
||||
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
css_cs(css)->attach_in_progress--;
|
||||
cs->attach_in_progress--;
|
||||
if (!cs->attach_in_progress)
|
||||
wake_up(&cpuset_attach_wq);
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
/*
|
||||
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
|
||||
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
|
||||
* but we can't allocate it dynamically there. Define it global and
|
||||
* allocate from cpuset_init().
|
||||
*/
|
||||
static cpumask_var_t cpus_attach;
|
||||
static nodemask_t cpuset_attach_nodemask_to;
|
||||
|
||||
static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
|
||||
{
|
||||
percpu_rwsem_assert_held(&cpuset_rwsem);
|
||||
|
||||
if (cs != &top_cpuset)
|
||||
guarantee_online_cpus(task, cpus_attach);
|
||||
else
|
||||
cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
|
||||
cs->subparts_cpus);
|
||||
/*
|
||||
* can_attach beforehand should guarantee that this doesn't
|
||||
* fail. TODO: have a better way to handle failure here
|
||||
*/
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
|
||||
|
||||
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
|
||||
cpuset_update_task_spread_flags(cs, task);
|
||||
}
|
||||
|
||||
static void cpuset_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
/* static buf protected by cpuset_rwsem */
|
||||
static nodemask_t cpuset_attach_nodemask_to;
|
||||
struct task_struct *task;
|
||||
struct task_struct *leader;
|
||||
struct cgroup_subsys_state *css;
|
||||
@@ -2543,20 +2579,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
|
||||
|
||||
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
if (cs != &top_cpuset)
|
||||
guarantee_online_cpus(task, cpus_attach);
|
||||
else
|
||||
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
|
||||
/*
|
||||
* can_attach beforehand should guarantee that this doesn't
|
||||
* fail. TODO: have a better way to handle failure here
|
||||
*/
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
|
||||
|
||||
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
|
||||
cpuset_update_task_spread_flags(cs, task);
|
||||
}
|
||||
cgroup_taskset_for_each(task, css, tset)
|
||||
cpuset_attach_task(cs, task);
|
||||
|
||||
/*
|
||||
* Change mm for all threadgroup leaders. This is expensive and may
|
||||
@@ -3247,6 +3271,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
/*
|
||||
* In case the child is cloned into a cpuset different from its parent,
|
||||
* additional checks are done to see if the move is allowed.
|
||||
*/
|
||||
static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
|
||||
{
|
||||
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
|
||||
bool same_cs;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
same_cs = (cs == task_cs(current));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (same_cs)
|
||||
return 0;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
|
||||
/* Check to see if task is allowed in the cpuset */
|
||||
ret = cpuset_can_attach_check(cs);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = task_can_attach(task, cs->effective_cpus);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = security_task_setscheduler(task);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Mark attach is in progress. This makes validate_change() fail
|
||||
* changes which zero cpus/mems_allowed.
|
||||
*/
|
||||
cs->attach_in_progress++;
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
|
||||
{
|
||||
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
|
||||
bool same_cs;
|
||||
|
||||
rcu_read_lock();
|
||||
same_cs = (cs == task_cs(current));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (same_cs)
|
||||
return;
|
||||
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
cs->attach_in_progress--;
|
||||
if (!cs->attach_in_progress)
|
||||
wake_up(&cpuset_attach_wq);
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the new task conform to the current state of its parent,
|
||||
* which could have been changed by cpuset just after it inherits the
|
||||
@@ -3254,11 +3340,33 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
|
||||
*/
|
||||
static void cpuset_fork(struct task_struct *task)
|
||||
{
|
||||
if (task_css_is_root(task, cpuset_cgrp_id))
|
||||
return;
|
||||
struct cpuset *cs;
|
||||
bool same_cs;
|
||||
|
||||
set_cpus_allowed_ptr(task, current->cpus_ptr);
|
||||
task->mems_allowed = current->mems_allowed;
|
||||
rcu_read_lock();
|
||||
cs = task_cs(task);
|
||||
same_cs = (cs == task_cs(current));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (same_cs) {
|
||||
if (cs == &top_cpuset)
|
||||
return;
|
||||
|
||||
set_cpus_allowed_ptr(task, current->cpus_ptr);
|
||||
task->mems_allowed = current->mems_allowed;
|
||||
return;
|
||||
}
|
||||
|
||||
/* CLONE_INTO_CGROUP */
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
|
||||
cpuset_attach_task(cs, task);
|
||||
|
||||
cs->attach_in_progress--;
|
||||
if (!cs->attach_in_progress)
|
||||
wake_up(&cpuset_attach_wq);
|
||||
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
struct cgroup_subsys cpuset_cgrp_subsys = {
|
||||
@@ -3271,6 +3379,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
|
||||
.attach = cpuset_attach,
|
||||
.post_attach = cpuset_post_attach,
|
||||
.bind = cpuset_bind,
|
||||
.can_fork = cpuset_can_fork,
|
||||
.cancel_fork = cpuset_cancel_fork,
|
||||
.fork = cpuset_fork,
|
||||
.legacy_cftypes = legacy_files,
|
||||
.dfl_cftypes = dfl_files,
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
/*
|
||||
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
|
||||
@@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
|
||||
if (freeze) {
|
||||
if (!(freezer->state & CGROUP_FREEZING))
|
||||
static_branch_inc(&freezer_active);
|
||||
static_branch_inc_cpuslocked(&freezer_active);
|
||||
freezer->state |= state;
|
||||
freeze_cgroup(freezer);
|
||||
} else {
|
||||
@@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
if (was_freezing)
|
||||
static_branch_dec(&freezer_active);
|
||||
static_branch_dec_cpuslocked(&freezer_active);
|
||||
unfreeze_cgroup(freezer);
|
||||
}
|
||||
}
|
||||
@@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
{
|
||||
struct cgroup_subsys_state *pos;
|
||||
|
||||
cpus_read_lock();
|
||||
/*
|
||||
* Update all its descendants in pre-order traversal. Each
|
||||
* descendant will try to inherit its parent's FREEZING state as
|
||||
@@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static ssize_t freezer_write(struct kernfs_open_file *of,
|
||||
|
||||
@@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
|
||||
struct task_cputime *cputime = &bstat->cputime;
|
||||
int i;
|
||||
|
||||
cputime->stime = 0;
|
||||
cputime->utime = 0;
|
||||
cputime->sum_exec_runtime = 0;
|
||||
memset(bstat, 0, sizeof(*bstat));
|
||||
for_each_possible_cpu(i) {
|
||||
struct kernel_cpustat kcpustat;
|
||||
u64 *cpustat = kcpustat.cpustat;
|
||||
|
||||
@@ -623,10 +623,10 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
|
||||
phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
|
||||
unsigned long max_slots = get_max_slots(boundary_mask);
|
||||
unsigned int iotlb_align_mask =
|
||||
dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
|
||||
dma_get_min_align_mask(dev) | alloc_align_mask;
|
||||
unsigned int nslots = nr_slots(alloc_size), stride;
|
||||
unsigned int index, wrap, count = 0, i;
|
||||
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
|
||||
unsigned int index, slots_checked, count = 0, i;
|
||||
unsigned long flags;
|
||||
unsigned int slot_base;
|
||||
unsigned int slot_index;
|
||||
@@ -634,30 +634,35 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
|
||||
BUG_ON(!nslots);
|
||||
BUG_ON(area_index >= mem->nareas);
|
||||
|
||||
/*
|
||||
* For allocations of PAGE_SIZE or larger only look for page aligned
|
||||
* allocations.
|
||||
*/
|
||||
if (alloc_size >= PAGE_SIZE)
|
||||
iotlb_align_mask |= ~PAGE_MASK;
|
||||
iotlb_align_mask &= ~(IO_TLB_SIZE - 1);
|
||||
|
||||
/*
|
||||
* For mappings with an alignment requirement don't bother looping to
|
||||
* unaligned slots once we found an aligned one. For allocations of
|
||||
* PAGE_SIZE or larger only look for page aligned allocations.
|
||||
* unaligned slots once we found an aligned one.
|
||||
*/
|
||||
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
|
||||
if (alloc_size >= PAGE_SIZE)
|
||||
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
|
||||
stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
|
||||
|
||||
spin_lock_irqsave(&area->lock, flags);
|
||||
if (unlikely(nslots > mem->area_nslabs - area->used))
|
||||
goto not_found;
|
||||
|
||||
slot_base = area_index * mem->area_nslabs;
|
||||
index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
|
||||
index = area->index;
|
||||
|
||||
do {
|
||||
for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
|
||||
slot_index = slot_base + index;
|
||||
|
||||
if (orig_addr &&
|
||||
(slot_addr(tbl_dma_addr, slot_index) &
|
||||
iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
|
||||
index = wrap_area_index(mem, index + 1);
|
||||
slots_checked++;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -673,7 +678,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
|
||||
goto found;
|
||||
}
|
||||
index = wrap_area_index(mem, index + stride);
|
||||
} while (index != wrap);
|
||||
slots_checked += stride;
|
||||
}
|
||||
|
||||
not_found:
|
||||
spin_unlock_irqrestore(&area->lock, flags);
|
||||
@@ -693,10 +699,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
|
||||
/*
|
||||
* Update the indices to avoid searching in the next round.
|
||||
*/
|
||||
if (index + nslots < mem->area_nslabs)
|
||||
area->index = index + nslots;
|
||||
else
|
||||
area->index = 0;
|
||||
area->index = wrap_area_index(mem, index + nslots);
|
||||
area->used += nslots;
|
||||
spin_unlock_irqrestore(&area->lock, flags);
|
||||
return slot_index;
|
||||
|
||||
@@ -21,7 +21,7 @@ static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
|
||||
arch_enter_from_user_mode(regs);
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
|
||||
CT_WARN_ON(ct_state() != CONTEXT_USER);
|
||||
CT_WARN_ON(__ct_state() != CONTEXT_USER);
|
||||
user_exit_irqoff();
|
||||
|
||||
instrumentation_begin();
|
||||
@@ -192,13 +192,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
||||
|
||||
static void exit_to_user_mode_prepare(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ti_work = read_thread_flags();
|
||||
unsigned long ti_work;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Flush pending rcuog wakeup before the last need_resched() check */
|
||||
tick_nohz_user_enter_prepare();
|
||||
|
||||
ti_work = read_thread_flags();
|
||||
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
|
||||
ti_work = exit_to_user_mode_loop(regs, ti_work);
|
||||
|
||||
|
||||
@@ -12173,7 +12173,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
|
||||
/*
|
||||
* If its not a per-cpu rb, it must be the same task.
|
||||
*/
|
||||
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
|
||||
if (output_event->cpu == -1 && output_event->hw.target != event->hw.target)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@@ -12893,12 +12893,14 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
|
||||
__perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->pinned_groups, &events);
|
||||
__perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->flexible_groups, &events);
|
||||
|
||||
/*
|
||||
* Wait for the events to quiesce before re-instating them.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
if (!list_empty(&events)) {
|
||||
/*
|
||||
* Wait for the events to quiesce before re-instating them.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
__perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
|
||||
__perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
|
||||
}
|
||||
|
||||
mutex_unlock(&dst_ctx->mutex);
|
||||
mutex_unlock(&src_ctx->mutex);
|
||||
|
||||
@@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
mt_clear_in_rcu(vmi.mas.tree);
|
||||
for_each_vma(old_vmi, mpnt) {
|
||||
struct file *file;
|
||||
|
||||
@@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
retval = arch_dup_mmap(oldmm, mm);
|
||||
loop_out:
|
||||
vma_iter_free(&vmi);
|
||||
if (!retval)
|
||||
mt_set_in_rcu(vmi.mas.tree);
|
||||
out:
|
||||
mmap_write_unlock(mm);
|
||||
flush_tlb_mm(oldmm);
|
||||
@@ -755,11 +758,6 @@ static void check_mm(struct mm_struct *mm)
|
||||
for (i = 0; i < NR_MM_COUNTERS; i++) {
|
||||
long x = percpu_counter_sum(&mm->rss_stat[i]);
|
||||
|
||||
if (likely(!x))
|
||||
continue;
|
||||
|
||||
/* Making sure this is not due to race with CPU offlining. */
|
||||
x = percpu_counter_sum_all(&mm->rss_stat[i]);
|
||||
if (unlikely(x))
|
||||
pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
|
||||
mm, resident_page_types[i], x);
|
||||
|
||||
@@ -16,6 +16,6 @@ obj-y := core.o debugfs.o report.o
|
||||
KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
|
||||
obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
|
||||
|
||||
CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
|
||||
CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer
|
||||
CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
|
||||
obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o
|
||||
|
||||
@@ -3024,6 +3024,18 @@ need_offload_krc(struct kfree_rcu_cpu *krcp)
|
||||
return !!READ_ONCE(krcp->head);
|
||||
}
|
||||
|
||||
static bool
|
||||
need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++)
|
||||
if (!list_empty(&krwp->bulk_head_free[i]))
|
||||
return true;
|
||||
|
||||
return !!krwp->head_free;
|
||||
}
|
||||
|
||||
static int krc_count(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
int sum = atomic_read(&krcp->head_count);
|
||||
@@ -3107,15 +3119,14 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||
struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
|
||||
|
||||
// Try to detach bulk_head or head and attach it over any
|
||||
// available corresponding free channel. It can be that
|
||||
// a previous RCU batch is in progress, it means that
|
||||
// immediately to queue another one is not possible so
|
||||
// in that case the monitor work is rearmed.
|
||||
if ((!list_empty(&krcp->bulk_head[0]) && list_empty(&krwp->bulk_head_free[0])) ||
|
||||
(!list_empty(&krcp->bulk_head[1]) && list_empty(&krwp->bulk_head_free[1])) ||
|
||||
(READ_ONCE(krcp->head) && !krwp->head_free)) {
|
||||
// Try to detach bulk_head or head and attach it, only when
|
||||
// all channels are free. Any channel is not free means at krwp
|
||||
// there is on-going rcu work to handle krwp's free business.
|
||||
if (need_wait_for_krwp_work(krwp))
|
||||
continue;
|
||||
|
||||
// kvfree_rcu_drain_ready() might handle this krcp, if so give up.
|
||||
if (need_offload_krc(krcp)) {
|
||||
// Channel 1 corresponds to the SLAB-pointer bulk path.
|
||||
// Channel 2 corresponds to vmalloc-pointer bulk path.
|
||||
for (j = 0; j < FREE_N_CHANNELS; j++) {
|
||||
|
||||
@@ -2099,6 +2099,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
|
||||
void activate_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (task_on_rq_migrating(p))
|
||||
flags |= ENQUEUE_MIGRATED;
|
||||
|
||||
enqueue_task(rq, p, flags);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
|
||||
@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool entity_is_long_sleeper(struct sched_entity *se)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
u64 sleep_time;
|
||||
|
||||
if (se->exec_start == 0)
|
||||
return false;
|
||||
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
|
||||
sleep_time = rq_clock_task(rq_of(cfs_rq));
|
||||
|
||||
/* Happen while migrating because of clock task divergence */
|
||||
if (sleep_time <= se->exec_start)
|
||||
return false;
|
||||
|
||||
sleep_time -= se->exec_start;
|
||||
if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
{
|
||||
u64 vruntime = cfs_rq->min_vruntime;
|
||||
u64 sleep_time;
|
||||
|
||||
/*
|
||||
* The 'current' period is already promised to the current tasks,
|
||||
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
|
||||
/*
|
||||
* Pull vruntime of the entity being placed to the base level of
|
||||
* cfs_rq, to prevent boosting it if placed backwards. If the entity
|
||||
* slept for a long time, don't even try to compare its vruntime with
|
||||
* the base as it may be too far off and the comparison may get
|
||||
* inversed due to s64 overflow.
|
||||
* cfs_rq, to prevent boosting it if placed backwards.
|
||||
* However, min_vruntime can advance much faster than real time, with
|
||||
* the extreme being when an entity with the minimal weight always runs
|
||||
* on the cfs_rq. If the waking entity slept for a long time, its
|
||||
* vruntime difference from min_vruntime may overflow s64 and their
|
||||
* comparison may get inversed, so ignore the entity's original
|
||||
* vruntime in that case.
|
||||
* The maximal vruntime speedup is given by the ratio of normal to
|
||||
* minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
|
||||
* When placing a migrated waking entity, its exec_start has been set
|
||||
* from a different rq. In order to take into account a possible
|
||||
* divergence between new and prev rq's clocks task because of irq and
|
||||
* stolen time, we take an additional margin.
|
||||
* So, cutting off on the sleep time of
|
||||
* 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
|
||||
* should be safe.
|
||||
*/
|
||||
sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
|
||||
if ((s64)sleep_time > 60LL * NSEC_PER_SEC)
|
||||
if (entity_is_long_sleeper(se))
|
||||
se->vruntime = vruntime;
|
||||
else
|
||||
se->vruntime = max_vruntime(se->vruntime, vruntime);
|
||||
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
|
||||
if (flags & ENQUEUE_WAKEUP)
|
||||
place_entity(cfs_rq, se, 0);
|
||||
/* Entity has migrated, no longer consider this task hot */
|
||||
if (flags & ENQUEUE_MIGRATED)
|
||||
se->exec_start = 0;
|
||||
|
||||
check_schedstat_required();
|
||||
update_stats_enqueue_fair(cfs_rq, se, flags);
|
||||
@@ -7661,9 +7697,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
||||
/* Tell new CPU we are migrated */
|
||||
se->avg.last_update_time = 0;
|
||||
|
||||
/* We have migrated, no longer consider this task hot */
|
||||
se->exec_start = 0;
|
||||
|
||||
update_scan_period(p, new_cpu);
|
||||
}
|
||||
|
||||
@@ -10209,6 +10242,16 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
|
||||
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
|
||||
sds->total_capacity;
|
||||
|
||||
/*
|
||||
* If the local group is more loaded than the average system
|
||||
* load, don't try to pull any tasks.
|
||||
*/
|
||||
if (local->avg_load >= sds->avg_load) {
|
||||
env->imbalance = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -5667,12 +5667,15 @@ int modify_ftrace_direct(unsigned long ip,
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (unlikely(ret && new_direct)) {
|
||||
direct->count++;
|
||||
list_del_rcu(&new_direct->next);
|
||||
synchronize_rcu_tasks();
|
||||
kfree(new_direct);
|
||||
ftrace_direct_func_count--;
|
||||
if (ret) {
|
||||
direct->addr = old_addr;
|
||||
if (unlikely(new_direct)) {
|
||||
direct->count++;
|
||||
list_del_rcu(&new_direct->next);
|
||||
synchronize_rcu_tasks();
|
||||
kfree(new_direct);
|
||||
ftrace_direct_func_count--;
|
||||
}
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
|
||||
@@ -3098,6 +3098,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
if (RB_WARN_ON(cpu_buffer,
|
||||
rb_is_reader_page(cpu_buffer->tail_page)))
|
||||
return;
|
||||
/*
|
||||
* No need for a memory barrier here, as the update
|
||||
* of the tail_page did it for this page.
|
||||
*/
|
||||
local_set(&cpu_buffer->commit_page->page->commit,
|
||||
rb_page_write(cpu_buffer->commit_page));
|
||||
rb_inc_page(&cpu_buffer->commit_page);
|
||||
@@ -3107,6 +3111,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
while (rb_commit_index(cpu_buffer) !=
|
||||
rb_page_write(cpu_buffer->commit_page)) {
|
||||
|
||||
/* Make sure the readers see the content of what is committed. */
|
||||
smp_wmb();
|
||||
local_set(&cpu_buffer->commit_page->page->commit,
|
||||
rb_page_write(cpu_buffer->commit_page));
|
||||
RB_WARN_ON(cpu_buffer,
|
||||
@@ -4684,7 +4690,12 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
|
||||
/*
|
||||
* Make sure we see any padding after the write update
|
||||
* (see rb_reset_tail())
|
||||
* (see rb_reset_tail()).
|
||||
*
|
||||
* In addition, a writer may be writing on the reader page
|
||||
* if the page has not been fully filled, so the read barrier
|
||||
* is also needed to make sure we see the content of what is
|
||||
* committed by the writer (see rb_set_commit_to_write()).
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
|
||||
@@ -1149,22 +1149,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr,
|
||||
unsigned long flags;
|
||||
|
||||
if (in_nmi()) {
|
||||
internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
|
||||
internal_trace_puts("*** snapshot is being ignored ***\n");
|
||||
trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
|
||||
trace_array_puts(tr, "*** snapshot is being ignored ***\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!tr->allocated_snapshot) {
|
||||
internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
|
||||
internal_trace_puts("*** stopping trace here! ***\n");
|
||||
tracing_off();
|
||||
trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
|
||||
trace_array_puts(tr, "*** stopping trace here! ***\n");
|
||||
tracer_tracing_off(tr);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Note, snapshot can not be used when the tracer uses it */
|
||||
if (tracer->use_max_tr) {
|
||||
internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
|
||||
internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
|
||||
trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
|
||||
trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -9516,6 +9516,7 @@ static int __remove_instance(struct trace_array *tr)
|
||||
tracefs_remove(tr->dir);
|
||||
free_percpu(tr->last_func_repeats);
|
||||
free_trace_buffers(tr);
|
||||
clear_tracing_err_log(tr);
|
||||
|
||||
for (i = 0; i < tr->nr_topts; i++) {
|
||||
kfree(tr->topts[i].topts);
|
||||
@@ -10393,19 +10394,20 @@ __init static int tracer_alloc_buffers(void)
|
||||
|
||||
void __init ftrace_boot_snapshot(void)
|
||||
{
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
struct trace_array *tr;
|
||||
|
||||
if (snapshot_at_boot) {
|
||||
tracing_snapshot();
|
||||
internal_trace_puts("** Boot snapshot taken **\n");
|
||||
}
|
||||
if (!snapshot_at_boot)
|
||||
return;
|
||||
|
||||
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
|
||||
if (tr == &global_trace)
|
||||
if (!tr->allocated_snapshot)
|
||||
continue;
|
||||
trace_array_puts(tr, "** Boot snapshot taken **\n");
|
||||
|
||||
tracing_snapshot_instance(tr);
|
||||
trace_array_puts(tr, "** Boot snapshot taken **\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init early_trace_init(void)
|
||||
|
||||
@@ -44,14 +44,21 @@ enum { ERRORS };
|
||||
|
||||
static const char *err_text[] = { ERRORS };
|
||||
|
||||
static DEFINE_MUTEX(lastcmd_mutex);
|
||||
static char *last_cmd;
|
||||
|
||||
static int errpos(const char *str)
|
||||
{
|
||||
if (!str || !last_cmd)
|
||||
return 0;
|
||||
int ret = 0;
|
||||
|
||||
return err_pos(last_cmd, str);
|
||||
mutex_lock(&lastcmd_mutex);
|
||||
if (!str || !last_cmd)
|
||||
goto out;
|
||||
|
||||
ret = err_pos(last_cmd, str);
|
||||
out:
|
||||
mutex_unlock(&lastcmd_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void last_cmd_set(const char *str)
|
||||
@@ -59,18 +66,22 @@ static void last_cmd_set(const char *str)
|
||||
if (!str)
|
||||
return;
|
||||
|
||||
mutex_lock(&lastcmd_mutex);
|
||||
kfree(last_cmd);
|
||||
|
||||
last_cmd = kstrdup(str, GFP_KERNEL);
|
||||
mutex_unlock(&lastcmd_mutex);
|
||||
}
|
||||
|
||||
static void synth_err(u8 err_type, u16 err_pos)
|
||||
{
|
||||
mutex_lock(&lastcmd_mutex);
|
||||
if (!last_cmd)
|
||||
return;
|
||||
goto out;
|
||||
|
||||
tracing_log_err(NULL, "synthetic_events", last_cmd, err_text,
|
||||
err_type, err_pos);
|
||||
out:
|
||||
mutex_unlock(&lastcmd_mutex);
|
||||
}
|
||||
|
||||
static int create_synth_event(const char *raw_command);
|
||||
|
||||
@@ -1296,7 +1296,7 @@ static void notify_new_max_latency(u64 latency)
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
|
||||
tr = inst->tr;
|
||||
if (tr->max_latency < latency) {
|
||||
if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
|
||||
tr->max_latency = latency;
|
||||
latency_fsnotify(tr);
|
||||
}
|
||||
@@ -1738,6 +1738,8 @@ static int timerlat_main(void *data)
|
||||
|
||||
trace_timerlat_sample(&s);
|
||||
|
||||
notify_new_max_latency(diff);
|
||||
|
||||
timerlat_dump_stack(time_to_us(diff));
|
||||
|
||||
tlat->tracing_thread = false;
|
||||
|
||||
Reference in New Issue
Block a user