Merge tag 'wq-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue updates from Tejun Heo:

 - WQ_PERCPU was added to remaining alloc_workqueue() users and
   system_wq usage was replaced with system_percpu_wq and
   system_unbound_wq with system_dfl_wq.

   These are equivalent conversions with no functional changes,
   preparing for switching default to unbound workqueues from percpu.

 - A handshake mechanism was added for canceling BH workers to avoid
   live lock scenarios under PREEMPT_RT.

 - Unnecessary rcu_read_lock/unlock() calls were dropped in
   wq_watchdog_timer_fn() and workqueue_congested().

 - Documentation was fixed to resolve texinfodocs warnings.

* tag 'wq-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: fix texinfodocs warning for WQ_* flags reference
  workqueue: WQ_PERCPU added to alloc_workqueue users
  workqueue: replace use of system_wq with system_percpu_wq
  workqueue: replace use of system_unbound_wq with system_dfl_wq
  workqueue: Provide a handshake for canceling BH workers
  workqueue: Remove rcu_read_lock/unlock() in wq_watchdog_timer_fn()
  workqueue: Remove redundant rcu_read_lock/unlock() in workqueue_congested()
This commit is contained in:
Linus Torvalds
2025-09-30 09:31:09 -07:00
2 changed files with 69 additions and 43 deletions

View File

@@ -410,7 +410,7 @@ enum wq_flags {
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
/* BH wq only allows the following flags */
__WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI,
__WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU,
};
enum wq_consts {
@@ -434,10 +434,10 @@ enum wq_consts {
* short queue flush time. Don't queue works which can run for too
* long.
*
* system_highpri_wq is similar to system_wq but for work items which
* system_highpri_wq is similar to system_percpu_wq but for work items which
* require WQ_HIGHPRI.
*
* system_long_wq is similar to system_wq but may host long running
* system_long_wq is similar to system_percpu_wq but may host long running
* works. Queue flushing might take relatively long.
*
* system_dfl_wq is unbound workqueue. Workers are not bound to
@@ -445,13 +445,13 @@ enum wq_consts {
* executed immediately as long as max_active limit is not reached and
* resources are available.
*
* system_freezable_wq is equivalent to system_wq except that it's
* system_freezable_wq is equivalent to system_percpu_wq except that it's
* freezable.
*
* *_power_efficient_wq are inclined towards saving power and converted
* into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
* they are same as their non-power-efficient counterparts - e.g.
* system_power_efficient_wq is identical to system_wq if
* system_power_efficient_wq is identical to system_percpu_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
*
* system_bh[_highpri]_wq are convenience interface to softirq. BH work items
@@ -502,7 +502,7 @@ void workqueue_softirq_dead(unsigned int cpu);
* min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
* that the sum of per-node max_active's may be larger than @max_active.
*
* For detailed information on %WQ_* flags, please refer to
* For detailed information on %WQ_\* flags, please refer to
* Documentation/core-api/workqueue.rst.
*
* RETURNS:
@@ -570,7 +570,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
#define create_workqueue(name) \
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
#define create_freezable_workqueue(name) \
alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
WQ_MEM_RECLAIM, 1, (name))
@@ -708,7 +708,7 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq,
*/
static inline bool schedule_work_on(int cpu, struct work_struct *work)
{
return queue_work_on(cpu, system_wq, work);
return queue_work_on(cpu, system_percpu_wq, work);
}
/**
@@ -727,7 +727,7 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
*/
static inline bool schedule_work(struct work_struct *work)
{
return queue_work(system_wq, work);
return queue_work(system_percpu_wq, work);
}
/**
@@ -770,21 +770,21 @@ extern void __warn_flushing_systemwide_wq(void)
#define flush_scheduled_work() \
({ \
__warn_flushing_systemwide_wq(); \
__flush_workqueue(system_wq); \
__flush_workqueue(system_percpu_wq); \
})
#define flush_workqueue(wq) \
({ \
struct workqueue_struct *_wq = (wq); \
\
if ((__builtin_constant_p(_wq == system_wq) && \
_wq == system_wq) || \
if ((__builtin_constant_p(_wq == system_percpu_wq) && \
_wq == system_percpu_wq) || \
(__builtin_constant_p(_wq == system_highpri_wq) && \
_wq == system_highpri_wq) || \
(__builtin_constant_p(_wq == system_long_wq) && \
_wq == system_long_wq) || \
(__builtin_constant_p(_wq == system_unbound_wq) && \
_wq == system_unbound_wq) || \
(__builtin_constant_p(_wq == system_dfl_wq) && \
_wq == system_dfl_wq) || \
(__builtin_constant_p(_wq == system_freezable_wq) && \
_wq == system_freezable_wq) || \
(__builtin_constant_p(_wq == system_power_efficient_wq) && \
@@ -807,7 +807,7 @@ extern void __warn_flushing_systemwide_wq(void)
static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
unsigned long delay)
{
return queue_delayed_work_on(cpu, system_wq, dwork, delay);
return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay);
}
/**
@@ -821,7 +821,7 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
static inline bool schedule_delayed_work(struct delayed_work *dwork,
unsigned long delay)
{
return queue_delayed_work(system_wq, dwork, delay);
return queue_delayed_work(system_percpu_wq, dwork, delay);
}
#ifndef CONFIG_SMP

View File

@@ -222,7 +222,9 @@ struct worker_pool {
struct workqueue_attrs *attrs; /* I: worker attributes */
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
int refcnt; /* PL: refcnt for unbound pools */
#ifdef CONFIG_PREEMPT_RT
spinlock_t cb_lock; /* BH worker cancel lock */
#endif
/*
* Destruction of pool is RCU protected to allow dereferences
* from get_work_pool().
@@ -2930,7 +2932,7 @@ static void idle_worker_timeout(struct timer_list *t)
raw_spin_unlock_irq(&pool->lock);
if (do_cull)
queue_work(system_unbound_wq, &pool->idle_cull_work);
queue_work(system_dfl_wq, &pool->idle_cull_work);
}
/**
@@ -3078,6 +3080,31 @@ __acquires(&pool->lock)
goto restart;
}
#ifdef CONFIG_PREEMPT_RT
static void worker_lock_callback(struct worker_pool *pool)
{
spin_lock(&pool->cb_lock);
}
static void worker_unlock_callback(struct worker_pool *pool)
{
spin_unlock(&pool->cb_lock);
}
static void workqueue_callback_cancel_wait_running(struct worker_pool *pool)
{
spin_lock(&pool->cb_lock);
spin_unlock(&pool->cb_lock);
}
#else
static void worker_lock_callback(struct worker_pool *pool) { }
static void worker_unlock_callback(struct worker_pool *pool) { }
static void workqueue_callback_cancel_wait_running(struct worker_pool *pool) { }
#endif
/**
* manage_workers - manage worker pool
* @worker: self
@@ -3557,6 +3584,7 @@ static void bh_worker(struct worker *worker)
int nr_restarts = BH_WORKER_RESTARTS;
unsigned long end = jiffies + BH_WORKER_JIFFIES;
worker_lock_callback(pool);
raw_spin_lock_irq(&pool->lock);
worker_leave_idle(worker);
@@ -3585,6 +3613,7 @@ static void bh_worker(struct worker *worker)
worker_enter_idle(worker);
kick_pool(pool);
raw_spin_unlock_irq(&pool->lock);
worker_unlock_callback(pool);
}
/*
@@ -4222,17 +4251,17 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
(data & WORK_OFFQ_BH)) {
/*
* On RT, prevent a live lock when %current preempted
* soft interrupt processing or prevents ksoftirqd from
* running by keeping flipping BH. If the BH work item
* runs on a different CPU then this has no effect other
* than doing the BH disable/enable dance for nothing.
* This is copied from
* kernel/softirq.c::tasklet_unlock_spin_wait().
* soft interrupt processing by blocking on lock which
* is owned by the thread invoking the callback.
*/
while (!try_wait_for_completion(&barr.done)) {
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
local_bh_disable();
local_bh_enable();
struct worker_pool *pool;
guard(rcu)();
pool = get_work_pool(work);
if (pool)
workqueue_callback_cancel_wait_running(pool);
} else {
cpu_relax();
}
@@ -4782,6 +4811,9 @@ static int init_worker_pool(struct worker_pool *pool)
ida_init(&pool->worker_ida);
INIT_HLIST_NODE(&pool->hash_node);
pool->refcnt = 1;
#ifdef CONFIG_PREEMPT_RT
spin_lock_init(&pool->cb_lock);
#endif
/* shouldn't fail above this point */
pool->attrs = alloc_workqueue_attrs();
@@ -6046,7 +6078,6 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
struct pool_workqueue *pwq;
bool ret;
rcu_read_lock();
preempt_disable();
if (cpu == WORK_CPU_UNBOUND)
@@ -6056,7 +6087,6 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
ret = !list_empty(&pwq->inactive_works);
preempt_enable();
rcu_read_unlock();
return ret;
}
@@ -7546,8 +7576,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
if (!thresh)
return;
rcu_read_lock();
for_each_pool(pool, pi) {
unsigned long pool_ts, touched, ts;
@@ -7589,8 +7617,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
}
rcu_read_unlock();
if (lockup_detected)
show_all_workqueues();
@@ -7642,7 +7668,7 @@ static int wq_watchdog_param_set_thresh(const char *val,
if (ret)
return ret;
if (system_wq)
if (system_percpu_wq)
wq_watchdog_set_thresh(thresh);
else
wq_watchdog_thresh = thresh;
@@ -7802,22 +7828,22 @@ void __init workqueue_init_early(void)
ordered_wq_attrs[i] = attrs;
}
system_wq = alloc_workqueue("events", 0, 0);
system_percpu_wq = alloc_workqueue("events", 0, 0);
system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
system_long_wq = alloc_workqueue("events_long", 0, 0);
system_wq = alloc_workqueue("events", WQ_PERCPU, 0);
system_percpu_wq = alloc_workqueue("events", WQ_PERCPU, 0);
system_highpri_wq = alloc_workqueue("events_highpri",
WQ_HIGHPRI | WQ_PERCPU, 0);
system_long_wq = alloc_workqueue("events_long", WQ_PERCPU, 0);
system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE);
system_dfl_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",
WQ_FREEZABLE, 0);
WQ_FREEZABLE | WQ_PERCPU, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient",
WQ_POWER_EFFICIENT, 0);
WQ_POWER_EFFICIENT | WQ_PERCPU, 0);
system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient",
WQ_FREEZABLE | WQ_POWER_EFFICIENT,
0);
system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0);
WQ_FREEZABLE | WQ_POWER_EFFICIENT | WQ_PERCPU, 0);
system_bh_wq = alloc_workqueue("events_bh", WQ_BH | WQ_PERCPU, 0);
system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
WQ_BH | WQ_HIGHPRI, 0);
WQ_BH | WQ_HIGHPRI | WQ_PERCPU, 0);
BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq ||
!system_unbound_wq || !system_freezable_wq || !system_dfl_wq ||
!system_power_efficient_wq ||