From 7a4f92d39f66f890cbb157dd4d7daf6a9298324a Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 16 Sep 2025 10:29:04 +0200 Subject: [PATCH 1/3] fs: replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://lore.kernel.org/20250916082906.77439-2-marco.crivellari@suse.com Signed-off-by: Christian Brauner --- fs/afs/callback.c | 4 ++-- fs/afs/write.c | 2 +- fs/bcachefs/btree_write_buffer.c | 2 +- fs/bcachefs/io_read.c | 8 ++++---- fs/bcachefs/journal_io.c | 2 +- fs/btrfs/block-group.c | 2 +- fs/btrfs/extent_map.c | 2 +- fs/btrfs/space-info.c | 4 ++-- fs/btrfs/zoned.c | 2 +- fs/coredump.c | 2 +- fs/ext4/mballoc.c | 2 +- fs/netfs/misc.c | 2 +- fs/netfs/objects.c | 2 +- fs/nfsd/filecache.c | 2 +- fs/notify/mark.c | 4 ++-- fs/quota/dquot.c | 2 +- 16 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 69e1dd55b160..894d2bad6b6c 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -42,7 +42,7 @@ static void afs_volume_init_callback(struct afs_volume *volume) list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) { if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) { afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb); - queue_work(system_unbound_wq, &vnode->cb_work); + queue_work(system_dfl_wq, &vnode->cb_work); } } @@ -90,7 +90,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas if (reason != afs_cb_break_for_deleted && vnode->status.type == AFS_FTYPE_FILE && atomic_read(&vnode->cb_nr_mmap)) - queue_work(system_unbound_wq, &vnode->cb_work); + queue_work(system_dfl_wq, &vnode->cb_work); trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true); } else { diff --git a/fs/afs/write.c b/fs/afs/write.c index 2e7526ea883a..93ad86ff3345 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -172,7 +172,7 @@ static void afs_issue_write_worker(struct work_struct *work) void afs_issue_write(struct netfs_io_subrequest *subreq) { subreq->work.func = afs_issue_write_worker; - if (!queue_work(system_unbound_wq, &subreq->work)) + if (!queue_work(system_dfl_wq, &subreq->work)) WARN_ON_ONCE(1); } diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 4b095235a0d2..0afb44ce1a85 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -827,7 +827,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_ if (bch2_btree_write_buffer_should_flush(c) && __enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer) && - !queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work)) + !queue_work(system_dfl_wq, &c->btree_write_buffer.flush_work)) enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); if (dst->wb == &wb->flushing) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index e0874ad9a6cf..460e2e6341f1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -684,7 +684,7 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, if (bch2_err_matches(ret, BCH_ERR_data_read_retry)) { bch2_rbio_punt(rbio, bch2_rbio_retry, - RBIO_CONTEXT_UNBOUND, system_unbound_wq); + RBIO_CONTEXT_UNBOUND, system_dfl_wq); } else { rbio = bch2_rbio_free(rbio); @@ -921,10 +921,10 @@ static void __bch2_read_endio(struct work_struct *work) bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR); goto out; decompression_err: - bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_dfl_wq); goto out; decrypt_err: - bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_dfl_wq); goto out; } @@ -963,7 +963,7 @@ static void bch2_read_endio(struct bio *bio) rbio->promote || crc_is_compressed(rbio->pick.crc) || bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) - context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; + context = RBIO_CONTEXT_UNBOUND, wq = system_dfl_wq; else if (rbio->pick.crc.csum_type) context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 9e028dbcc3d0..29bea8e0e495 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1362,7 +1362,7 @@ int bch2_journal_read(struct bch_fs *c, BCH_DEV_READ_REF_journal_read)) closure_call(&ca->journal.read, bch2_journal_read_device, - system_unbound_wq, + system_dfl_wq, &jlist.cl); else degraded = true; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9bf282d2453c..9a0af7e4a935 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2031,7 +2031,7 @@ void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) btrfs_reclaim_sweep(fs_info); spin_lock(&fs_info->unused_bgs_lock); if (!list_empty(&fs_info->reclaim_bgs)) - queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work); + queue_work(system_dfl_wq, &fs_info->reclaim_bgs_work); spin_unlock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 57f52585a6dd..9a5a497edc97 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1372,7 +1372,7 @@ void btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) if (atomic64_cmpxchg(&fs_info->em_shrinker_nr_to_scan, 0, nr_to_scan) != 0) return; - queue_work(system_unbound_wq, &fs_info->em_shrinker_work); + queue_work(system_dfl_wq, &fs_info->em_shrinker_work); } void btrfs_init_extent_map_shrinker_work(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 0481c693ac2e..c573d80550ad 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -1830,7 +1830,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, space_info->flags, orig_bytes, flush, "enospc"); - queue_work(system_unbound_wq, async_work); + queue_work(system_dfl_wq, async_work); } } else { list_add_tail(&ticket.list, @@ -1847,7 +1847,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, need_preemptive_reclaim(fs_info, space_info)) { trace_btrfs_trigger_flush(fs_info, space_info->flags, orig_bytes, flush, "preempt"); - queue_work(system_unbound_wq, + queue_work(system_dfl_wq, &fs_info->preempt_reclaim_work); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 245e813ecd78..0f493db7ae44 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2488,7 +2488,7 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, refcount_inc(&eb->refs); bg->last_eb = eb; INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); - queue_work(system_unbound_wq, &bg->zone_finish_work); + queue_work(system_dfl_wq, &bg->zone_finish_work); } void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) diff --git a/fs/coredump.c b/fs/coredump.c index fedbead956ed..b22f99cb6818 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -635,7 +635,7 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) /* * Usermode helpers are childen of either - * system_unbound_wq or of kthreadd. So we know that + * system_dfl_wq or of kthreadd. So we know that * we're starting off with a clean file descriptor * table. So we should always be able to use * COREDUMP_PIDFD_NUMBER as our file descriptor value. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 5898d92ba19f..8b18802e83eb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3995,7 +3995,7 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) list_splice_tail(&freed_data_list, &sbi->s_discard_list); spin_unlock(&sbi->s_md_lock); if (wake) - queue_work(system_unbound_wq, &sbi->s_discard_work); + queue_work(system_dfl_wq, &sbi->s_discard_work); } else { list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) kmem_cache_free(ext4_free_data_cachep, entry); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 20748bcfbf59..486166460e17 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -321,7 +321,7 @@ void netfs_wake_collector(struct netfs_io_request *rreq) { if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { - queue_work(system_unbound_wq, &rreq->work); + queue_work(system_dfl_wq, &rreq->work); } else { trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); wake_up(&rreq->waitq); diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index e8c99738b5bb..2ebe56b24ddd 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -163,7 +163,7 @@ void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace dead = __refcount_dec_and_test(&rreq->ref, &r); trace_netfs_rreq_ref(debug_id, r - 1, what); if (dead) - WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work)); + WARN_ON(!queue_work(system_dfl_wq, &rreq->cleanup_work)); } } diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 732abf6b92a5..85ca663c052c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -113,7 +113,7 @@ static void nfsd_file_schedule_laundrette(void) { if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) - queue_delayed_work(system_unbound_wq, &nfsd_filecache_laundrette, + queue_delayed_work(system_dfl_wq, &nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 798340db69d7..55a03bb05aa1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -428,7 +428,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) conn->destroy_next = connector_destroy_list; connector_destroy_list = conn; spin_unlock(&destroy_lock); - queue_work(system_unbound_wq, &connector_reaper_work); + queue_work(system_dfl_wq, &connector_reaper_work); } /* * Note that we didn't update flags telling whether inode cares about @@ -439,7 +439,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) spin_lock(&destroy_lock); list_add(&mark->g_list, &destroy_list); spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, + queue_delayed_work(system_dfl_wq, &reaper_work, FSNOTIFY_REAPER_DELAY); } EXPORT_SYMBOL_GPL(fsnotify_put_mark); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index df4a9b348769..afa15a214538 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -881,7 +881,7 @@ void dqput(struct dquot *dquot) put_releasing_dquots(dquot); atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); - queue_delayed_work(system_unbound_wq, "a_release_work, 1); + queue_delayed_work(system_dfl_wq, "a_release_work, 1); } EXPORT_SYMBOL(dqput); From 4ef64db060619be040351e3960e151e5fef3f895 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 16 Sep 2025 10:29:05 +0200 Subject: [PATCH 2/3] fs: replace use of system_wq with system_percpu_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_wq is a per-CPU worqueue, yet nothing in its name tells about that CPU affinity constraint, which is very often not required by users. Make it clear by adding a system_percpu_wq to all the fs subsystem. The old wq will be kept for a few release cylces. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://lore.kernel.org/20250916082906.77439-3-marco.crivellari@suse.com Signed-off-by: Christian Brauner --- fs/aio.c | 2 +- fs/fs-writeback.c | 2 +- fs/fuse/dev.c | 2 +- fs/fuse/inode.c | 2 +- fs/nfs/namespace.c | 2 +- fs/nfs/nfs4renewd.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 7fc7b6221312..6002617f078c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -636,7 +636,7 @@ static void free_ioctx_reqs(struct percpu_ref *ref) /* Synchronize against RCU protected table->table[] dereferences */ INIT_RCU_WORK(&ctx->free_rwork, free_ioctx); - queue_rcu_work(system_wq, &ctx->free_rwork); + queue_rcu_work(system_percpu_wq, &ctx->free_rwork); } /* diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cc57367fb641..cf51a265bf27 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2442,7 +2442,7 @@ static int dirtytime_interval_handler(const struct ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) - mod_delayed_work(system_wq, &dirtytime_work, 0); + mod_delayed_work(system_percpu_wq, &dirtytime_work, 0); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e80cd8f2c049..8520eb94c527 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -119,7 +119,7 @@ void fuse_check_timeout(struct work_struct *work) goto abort_conn; out: - queue_delayed_work(system_wq, &fc->timeout.work, + queue_delayed_work(system_percpu_wq, &fc->timeout.work, fuse_timeout_timer_freq); return; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ecb869e895ab..b12cd19a9bca 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1273,7 +1273,7 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) { fc->timeout.req_timeout = secs_to_jiffies(timeout); INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); - queue_delayed_work(system_wq, &fc->timeout.work, + queue_delayed_work(system_percpu_wq, &fc->timeout.work, fuse_timeout_timer_freq); } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 7f1ec9c67ff2..f9a3a1fbf44c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -335,7 +335,7 @@ static int param_set_nfs_timeout(const char *val, const struct kernel_param *kp) num *= HZ; *((int *)kp->arg) = num; if (!list_empty(&nfs_automount_list)) - mod_delayed_work(system_wq, &nfs_automount_task, num); + mod_delayed_work(system_percpu_wq, &nfs_automount_task, num); } else { *((int *)kp->arg) = -1*HZ; cancel_delayed_work(&nfs_automount_task); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index db3811af0796..18ae614e5a6c 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -122,7 +122,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", __func__, (timeout + HZ - 1) / HZ); - mod_delayed_work(system_wq, &clp->cl_renewd, timeout); + mod_delayed_work(system_percpu_wq, &clp->cl_renewd, timeout); set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } From 69635d7f4b344e6f5344bba3c3de92e4fb8b0d2a Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 16 Sep 2025 10:29:06 +0200 Subject: [PATCH 3/3] fs: WQ_PERCPU added to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This patch adds a new WQ_PERCPU flag to all the fs subsystem users to explicitly request the use of the per-CPU behavior. Both flags coexist for one release cycle to allow callers to transition their calls. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. All existing users have been updated accordingly. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://lore.kernel.org/20250916082906.77439-4-marco.crivellari@suse.com Signed-off-by: Christian Brauner --- fs/afs/main.c | 4 ++-- fs/bcachefs/super.c | 10 +++++----- fs/btrfs/disk-io.c | 2 +- fs/ceph/super.c | 2 +- fs/dlm/lowcomms.c | 2 +- fs/dlm/main.c | 2 +- fs/fs-writeback.c | 2 +- fs/gfs2/main.c | 5 +++-- fs/gfs2/ops_fstype.c | 6 ++++-- fs/ocfs2/dlm/dlmdomain.c | 3 ++- fs/ocfs2/dlmfs/dlmfs.c | 3 ++- fs/smb/client/cifsfs.c | 16 +++++++++++----- fs/smb/server/ksmbd_work.c | 2 +- fs/smb/server/transport_rdma.c | 3 ++- fs/super.c | 3 ++- fs/verity/verify.c | 2 +- fs/xfs/xfs_log.c | 3 +-- fs/xfs/xfs_mru_cache.c | 3 ++- fs/xfs/xfs_super.c | 15 ++++++++------- 19 files changed, 51 insertions(+), 37 deletions(-) diff --git a/fs/afs/main.c b/fs/afs/main.c index 02475d415d88..e6bb8237db98 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -169,13 +169,13 @@ static int __init afs_init(void) printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); - afs_wq = alloc_workqueue("afs", 0, 0); + afs_wq = alloc_workqueue("afs", WQ_PERCPU, 0); if (!afs_wq) goto error_afs_wq; afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!afs_async_calls) goto error_async; - afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); + afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!afs_lock_manager) goto error_lockmgr; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index c46b1053a02c..f2417d298b84 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -801,13 +801,13 @@ int bch2_fs_init_rw(struct bch_fs *c) if (!(c->btree_update_wq = alloc_workqueue("bcachefs", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_PERCPU, 1)) || !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_PERCPU, 1)) || !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_PERCPU, 1)) || !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", - WQ_FREEZABLE, 0))) + WQ_FREEZABLE|WQ_PERCPU, 0))) return bch_err_throw(c, ENOMEM_fs_other_alloc); int ret = bch2_fs_btree_interior_update_init(c) ?: @@ -975,7 +975,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, sizeof(struct sort_iter_set); if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) || + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_PERCPU, 512)) || enumerated_ref_init(&c->writes, BCH_WRITE_REF_NR, bch2_writes_disabled) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 70fc4e7cc5a0..aa4393eba997 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1958,7 +1958,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) { u32 max_active = fs_info->thread_pool_size; unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; - unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE; + unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU; fs_info->workers = btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index c3eb651862c5..2dc64515f259 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -862,7 +862,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); if (!fsc->inode_wq) goto fail_client; - fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); + fsc->cap_wq = alloc_workqueue("ceph-cap", WQ_PERCPU, 1); if (!fsc->cap_wq) goto fail_inode_wq; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index e4373bce1bc2..9a0b6c2b6b01 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1703,7 +1703,7 @@ static int work_start(void) return -ENOMEM; } - process_workqueue = alloc_workqueue("dlm_process", WQ_HIGHPRI | WQ_BH, 0); + process_workqueue = alloc_workqueue("dlm_process", WQ_HIGHPRI | WQ_BH | WQ_PERCPU, 0); if (!process_workqueue) { log_print("can't start dlm_process"); destroy_workqueue(io_workqueue); diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 4887c8a05318..a44d16da7187 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -52,7 +52,7 @@ static int __init init_dlm(void) if (error) goto out_user; - dlm_wq = alloc_workqueue("dlm_wq", 0, 0); + dlm_wq = alloc_workqueue("dlm_wq", WQ_PERCPU, 0); if (!dlm_wq) { error = -ENOMEM; goto out_plock; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cf51a265bf27..4b1a53a3266b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1180,7 +1180,7 @@ void cgroup_writeback_umount(struct super_block *sb) static int __init cgroup_writeback_init(void) { - isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); + isw_wq = alloc_workqueue("inode_switch_wbs", WQ_PERCPU, 0); if (!isw_wq) return -ENOMEM; return 0; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 0727f60ad028..9d65719353fa 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -151,7 +151,8 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs2_recovery_wq = alloc_workqueue("gfs2_recovery", - WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, + 0); if (!gfs2_recovery_wq) goto fail_wq1; @@ -160,7 +161,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_control_wq) goto fail_wq2; - gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", 0, 0); + gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", WQ_PERCPU, 0); if (!gfs2_freeze_wq) goto fail_wq3; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index efe99b732551..05f936cc5db7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1193,13 +1193,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) error = -ENOMEM; sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0, + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE | WQ_PERCPU, + 0, sdp->sd_fsname); if (!sdp->sd_glock_wq) goto fail_iput; sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s", - WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname); + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, 0, + sdp->sd_fsname); if (!sdp->sd_delete_wq) goto fail_glock_wq; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 2018501b2249..2347a50f079b 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1876,7 +1876,8 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) dlm_debug_init(dlm); snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name); - dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0); + dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!dlm->dlm_worker) { status = -ENOMEM; mlog_errno(status); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 5130ec44e5e1..0b730535b2c8 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -595,7 +595,8 @@ static int __init init_dlmfs_fs(void) } cleanup_inode = 1; - user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0); + user_dlm_worker = alloc_workqueue("user_dlm", + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!user_dlm_worker) { status = -ENOMEM; goto bail; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 3bd85ab2deb1..7882b0346863 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1881,7 +1881,9 @@ init_cifs(void) cifs_dbg(VFS, "dir_cache_timeout set to max of 65000 seconds\n"); } - cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + cifsiod_wq = alloc_workqueue("cifsiod", + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!cifsiod_wq) { rc = -ENOMEM; goto out_clean_proc; @@ -1909,28 +1911,32 @@ init_cifs(void) } cifsoplockd_wq = alloc_workqueue("cifsoplockd", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!cifsoplockd_wq) { rc = -ENOMEM; goto out_destroy_fileinfo_put_wq; } deferredclose_wq = alloc_workqueue("deferredclose", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!deferredclose_wq) { rc = -ENOMEM; goto out_destroy_cifsoplockd_wq; } serverclose_wq = alloc_workqueue("serverclose", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!serverclose_wq) { rc = -ENOMEM; goto out_destroy_deferredclose_wq; } cfid_put_wq = alloc_workqueue("cfid_put_wq", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!cfid_put_wq) { rc = -ENOMEM; goto out_destroy_serverclose_wq; diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index 72b00ca6e455..4a71f46d7020 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -78,7 +78,7 @@ int ksmbd_work_pool_init(void) int ksmbd_workqueue_init(void) { - ksmbd_wq = alloc_workqueue("ksmbd-io", 0, 0); + ksmbd_wq = alloc_workqueue("ksmbd-io", WQ_PERCPU, 0); if (!ksmbd_wq) return -ENOMEM; return 0; diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8d366db5f605..9394b358e795 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2177,7 +2177,8 @@ int ksmbd_rdma_init(void) * for lack of credits */ smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", - WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!smb_direct_wq) return -ENOMEM; diff --git a/fs/super.c b/fs/super.c index 7f876f32343a..5cb3f41e42ca 100644 --- a/fs/super.c +++ b/fs/super.c @@ -2314,7 +2314,8 @@ int sb_init_dio_done_wq(struct super_block *sb) { struct workqueue_struct *old; struct workqueue_struct *wq = alloc_workqueue("dio/%s", - WQ_MEM_RECLAIM, 0, + WQ_MEM_RECLAIM | WQ_PERCPU, + 0, sb->s_id); if (!wq) return -ENOMEM; diff --git a/fs/verity/verify.c b/fs/verity/verify.c index a1f00c3fd3b2..628c23710f9f 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -355,7 +355,7 @@ void __init fsverity_init_workqueue(void) * latency on ARM64. */ fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", - WQ_HIGHPRI, + WQ_HIGHPRI | WQ_PERCPU, num_online_cpus()); if (!fsverity_read_workqueue) panic("failed to allocate fsverity_read_queue"); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c8a57e21a1d3..0da19472d603 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1489,8 +1489,7 @@ xlog_alloc_log( log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | - WQ_HIGHPRI), + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_PERCPU), 0, mp->m_super->s_id); if (!log->l_ioend_workqueue) goto out_free_iclog; diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 866c71d9fbae..73b7e72944e4 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -293,7 +293,8 @@ int xfs_mru_cache_init(void) { xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", - XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 1); + XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU), + 1); if (!xfs_mru_reap_wq) return -ENOMEM; return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bb0a82635a77..43e9aab71610 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -578,19 +578,19 @@ xfs_init_mount_workqueues( struct xfs_mount *mp) { mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 1, mp->m_super->s_id); if (!mp->m_buf_workqueue) goto out; mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 0, mp->m_super->s_id); if (!mp->m_unwritten_workqueue) goto out_destroy_buf; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) goto out_destroy_unwritten; @@ -602,13 +602,14 @@ xfs_init_mount_workqueues( goto out_destroy_reclaim; mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 1, mp->m_super->s_id); if (!mp->m_inodegc_wq) goto out_destroy_blockgc; mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", - XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); + XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0, + mp->m_super->s_id); if (!mp->m_sync_workqueue) goto out_destroy_inodegc; @@ -2596,8 +2597,8 @@ xfs_init_workqueues(void) * AGs in all the filesystems mounted. Hence use the default large * max_active value for this workqueue. */ - xfs_alloc_wq = alloc_workqueue("xfsalloc", - XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); + xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU), + 0); if (!xfs_alloc_wq) return -ENOMEM;