From 49783aca15fb73adb08e8ca5edc4dfb0a32540b9 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 31 Oct 2025 10:56:40 +0100 Subject: [PATCH 01/15] scsi: qla2xxx: Replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251031095643.74246-2-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5ffd94586652..d48d8671c18a 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5280,7 +5280,7 @@ void qla24xx_sched_upd_fcport(fc_port_t *fcport) qla2x00_set_fcport_disc_state(fcport, DSC_UPD_FCPORT); spin_unlock_irqrestore(&fcport->vha->work_lock, flags); - queue_work(system_unbound_wq, &fcport->reg_work); + queue_work(system_dfl_wq, &fcport->reg_work); } static From cd87aa2e507abb02560043c8e0e5bb9967ae7d72 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 31 Oct 2025 10:56:40 +0100 Subject: [PATCH 02/15] scsi: scsi_transport_iscsi: Replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251031095643.74246-2-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 743b4c792ceb..ed21c032bbc4 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3961,7 +3961,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) list_del_init(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); - queue_work(system_unbound_wq, &session->destroy_work); + queue_work(system_dfl_wq, &session->destroy_work); } break; case ISCSI_UEVENT_UNBIND_SESSION: From 0ba2fc767af711de1d830a61c8473168dcb31a8a Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 4 Nov 2025 11:45:18 +0100 Subject: [PATCH 03/15] scsi: target: sbp: Replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. This patch continues the effort to refactor worqueue APIs, which has begun with the change introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") system_dfl_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251104104518.102130-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/target/sbp/sbp_target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 3b89b5a70331..b8457477cee9 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -730,7 +730,7 @@ static int tgt_agent_rw_orb_pointer(struct fw_card *card, int tcode, void *data, pr_debug("tgt_agent ORB_POINTER write: 0x%llx\n", agent->orb_pointer); - queue_work(system_unbound_wq, &agent->work); + queue_work(system_dfl_wq, &agent->work); return RCODE_COMPLETE; @@ -764,7 +764,7 @@ static int tgt_agent_rw_doorbell(struct fw_card *card, int tcode, void *data, pr_debug("tgt_agent DOORBELL\n"); - queue_work(system_unbound_wq, &agent->work); + queue_work(system_dfl_wq, &agent->work); return RCODE_COMPLETE; @@ -990,7 +990,7 @@ static void tgt_agent_fetch_work(struct work_struct *work) if (tgt_agent_check_active(agent) && !doorbell) { INIT_WORK(&req->work, tgt_agent_process_work); - queue_work(system_unbound_wq, &req->work); + queue_work(system_dfl_wq, &req->work); } else { /* don't process this request, just check next_ORB */ sbp_free_request(req); @@ -1618,7 +1618,7 @@ static void sbp_mgt_agent_rw(struct fw_card *card, agent->orb_offset = sbp2_pointer_to_addr(ptr); agent->request = req; - queue_work(system_unbound_wq, &agent->work); + queue_work(system_dfl_wq, &agent->work); rcode = RCODE_COMPLETE; } else if (tcode == TCODE_READ_BLOCK_REQUEST) { addr_to_sbp2_pointer(agent->orb_offset, ptr); From 5ca003bb438197791f6394d5c05e5abf403ada24 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 31 Oct 2025 10:56:41 +0100 Subject: [PATCH 04/15] scsi: qla2xxx: WQ_PERCPU added to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251031095643.74246-3-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d48d8671c18a..af51f1cf5daa 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3397,7 +3397,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) "req->req_q_in=%p req->req_q_out=%p rsp->rsp_q_in=%p rsp->rsp_q_out=%p.\n", req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out); - ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0); + ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (unlikely(!ha->wq)) { ret = -ENOMEM; goto probe_failed; From afad6b34defed8ca19f8593b692024e8e296287f Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 31 Oct 2025 10:56:42 +0100 Subject: [PATCH 05/15] scsi: scsi_dh_alua: WQ_PERCPU added to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251031095643.74246-4-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 1bf5948d1188..6fd89ae33059 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1300,7 +1300,7 @@ static int __init alua_init(void) { int r; - kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); + kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!kaluad_wq) return -ENOMEM; From f76e4e1e836df41f602fa4c85f64f09e2a99266e Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 31 Oct 2025 10:56:43 +0100 Subject: [PATCH 06/15] scsi: scsi_transport_fc: WQ_PERCPU added to alloc_workqueue users() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Justin Tee Link: https://patch.msgid.link/20251031095643.74246-5-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 3a821afee9bc..987befb02408 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -441,7 +441,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev, fc_host->next_vport_number = 0; fc_host->npiv_vports_inuse = 0; - fc_host->work_q = alloc_workqueue("fc_wq_%d", 0, 0, shost->host_no); + fc_host->work_q = alloc_workqueue("fc_wq_%d", WQ_PERCPU, 0, + shost->host_no); if (!fc_host->work_q) return -ENOMEM; @@ -3088,7 +3089,7 @@ fc_remote_port_create(struct Scsi_Host *shost, int channel, spin_unlock_irqrestore(shost->host_lock, flags); - rport->devloss_work_q = alloc_workqueue("fc_dl_%d_%d", 0, 0, + rport->devloss_work_q = alloc_workqueue("fc_dl_%d_%d", WQ_PERCPU, 0, shost->host_no, rport->number); if (!rport->devloss_work_q) { printk(KERN_ERR "FC Remote Port alloc_workqueue failed\n"); From 84150ef06f897cc2c03456edb5ad5c2a03faf9f1 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 4 Nov 2025 12:08:08 +0100 Subject: [PATCH 07/15] scsi: lpfc: WQ_PERCPU added to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. This patch continues the effort to refactor worqueue APIs, which has begun with the change introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Justin Tee Link: https://patch.msgid.link/20251104110808.123424-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f206267d9ecd..45d43e9c5827 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -7950,7 +7950,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Allocate all driver workqueues here */ /* The lpfc_wq workqueue for deferred irq use */ - phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0); + phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!phba->wq) return -ENOMEM; From f0dc44177ac048e1b166661d520db4c833e6b40b Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 15:14:58 +0100 Subject: [PATCH 08/15] scsi: message: fusion: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107141458.225119-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptbase.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 738bc4e60a18..e60a8d3947c9 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -1857,7 +1857,8 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) INIT_DELAYED_WORK(&ioc->fault_reset_work, mpt_fault_reset_work); ioc->reset_work_q = - alloc_workqueue("mpt_poll_%d", WQ_MEM_RECLAIM, 0, ioc->id); + alloc_workqueue("mpt_poll_%d", WQ_MEM_RECLAIM | WQ_PERCPU, 0, + ioc->id); if (!ioc->reset_work_q) { printk(MYIOC_s_ERR_FMT "Insufficient memory to add adapter!\n", ioc->name); @@ -1984,7 +1985,9 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&ioc->fw_event_list); spin_lock_init(&ioc->fw_event_lock); - ioc->fw_event_q = alloc_workqueue("mpt/%d", WQ_MEM_RECLAIM, 0, ioc->id); + ioc->fw_event_q = alloc_workqueue("mpt/%d", + WQ_MEM_RECLAIM | WQ_PERCPU, 0, + ioc->id); if (!ioc->fw_event_q) { printk(MYIOC_s_ERR_FMT "Insufficient memory to add adapter!\n", ioc->name); From 6184ec8b633e507df39b2258f4627095a4dfd752 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 15:49:49 +0100 Subject: [PATCH 09/15] scsi: be2iscsi: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107144949.256894-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/be2iscsi/be_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index dc88bc46dcc0..a0e794ffc980 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5633,7 +5633,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, phba->ctrl.mcc_alloc_index = phba->ctrl.mcc_free_index = 0; - phba->wq = alloc_workqueue("beiscsi_%02x_wq", WQ_MEM_RECLAIM, 1, + phba->wq = alloc_workqueue("beiscsi_%02x_wq", + WQ_MEM_RECLAIM | WQ_PERCPU, 1, phba->shost->host_no); if (!phba->wq) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, From a43a2e48d534ffc6361ef5a026fc71e5f03696b8 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 16:01:54 +0100 Subject: [PATCH 10/15] scsi: bnx2fc: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107150155.267651-2-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 58da993251e9..0f68739d380a 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2695,7 +2695,7 @@ static int __init bnx2fc_mod_init(void) if (rc) goto detach_ft; - bnx2fc_wq = alloc_workqueue("bnx2fc", 0, 0); + bnx2fc_wq = alloc_workqueue("bnx2fc", WQ_PERCPU, 0); if (!bnx2fc_wq) { rc = -ENOMEM; goto release_bt; From e036dadf78f8c1bcbb3a4e5933f553c24fe5c162 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 16:01:55 +0100 Subject: [PATCH 11/15] scsi: qedf: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107150155.267651-3-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 6b1ebab36fa3..7792e00800ae 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3374,7 +3374,8 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO, "qedf->io_mempool=%p.\n", qedf->io_mempool); - qedf->link_update_wq = alloc_workqueue("qedf_%u_link", WQ_MEM_RECLAIM, + qedf->link_update_wq = alloc_workqueue("qedf_%u_link", + WQ_MEM_RECLAIM | WQ_PERCPU, 1, qedf->lport->host->host_no); INIT_DELAYED_WORK(&qedf->link_update, qedf_handle_link_update); INIT_DELAYED_WORK(&qedf->link_recovery, qedf_link_recovery); @@ -3585,7 +3586,8 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) ether_addr_copy(params.ll2_mac_address, qedf->mac); /* Start LL2 processing thread */ - qedf->ll2_recv_wq = alloc_workqueue("qedf_%d_ll2", WQ_MEM_RECLAIM, 1, + qedf->ll2_recv_wq = alloc_workqueue("qedf_%d_ll2", + WQ_MEM_RECLAIM | WQ_PERCPU, 1, host->host_no); if (!qedf->ll2_recv_wq) { QEDF_ERR(&(qedf->dbg_ctx), "Failed to LL2 workqueue.\n"); @@ -3628,7 +3630,8 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) } qedf->timer_work_queue = alloc_workqueue("qedf_%u_timer", - WQ_MEM_RECLAIM, 1, qedf->lport->host->host_no); + WQ_MEM_RECLAIM | WQ_PERCPU, 1, + qedf->lport->host->host_no); if (!qedf->timer_work_queue) { QEDF_ERR(&(qedf->dbg_ctx), "Failed to start timer " "workqueue.\n"); @@ -3641,7 +3644,8 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) sprintf(host_buf, "qedf_%u_dpc", qedf->lport->host->host_no); qedf->dpc_wq = - alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, host_buf); + alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_PERCPU, 1, + host_buf); } INIT_DELAYED_WORK(&qedf->recovery_work, qedf_recovery_handler); @@ -4177,7 +4181,8 @@ static int __init qedf_init(void) goto err3; } - qedf_io_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, "qedf_io_wq"); + qedf_io_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_PERCPU, 1, + "qedf_io_wq"); if (!qedf_io_wq) { QEDF_ERR(NULL, "Could not create qedf_io_wq.\n"); goto err4; From 42312d3acde57c37c9a688bef97c85618bd26cae Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 16:05:42 +0100 Subject: [PATCH 12/15] scsi: target: ibmvscsi: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107150542.271229-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 5a3787f27369..f259746bc804 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3533,7 +3533,8 @@ static int ibmvscsis_probe(struct vio_dev *vdev, init_completion(&vscsi->wait_idle); init_completion(&vscsi->unconfig); - vscsi->work_q = alloc_workqueue("ibmvscsis%s", WQ_MEM_RECLAIM, 1, + vscsi->work_q = alloc_workqueue("ibmvscsis%s", + WQ_MEM_RECLAIM | WQ_PERCPU, 1, dev_name(&vdev->dev)); if (!vscsi->work_q) { rc = -ENOMEM; From f60b8957d8cc18c8913812e693593debc3829b77 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 16:16:18 +0100 Subject: [PATCH 13/15] scsi: qedi: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107151618.281250-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index b168bb2178e9..56685ee22fdf 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2768,7 +2768,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) } qedi->offload_thread = alloc_workqueue("qedi_ofld%d", - WQ_MEM_RECLAIM, + WQ_MEM_RECLAIM | WQ_PERCPU, 1, qedi->shost->host_no); if (!qedi->offload_thread) { QEDI_ERR(&qedi->dbg_ctx, From 2e2e559390db56c36ebaa8db82fd16a2e367dfb6 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 16:40:08 +0100 Subject: [PATCH 14/15] scsi: target: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This change adds a new WQ_PERCPU flag to explicitly request alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107154008.304127-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 4 ++-- drivers/target/target_core_xcopy.c | 2 +- drivers/target/tcm_fc/tfc_conf.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0a76bdfe5528..ca571076c15b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -126,12 +126,12 @@ int init_se_kmem_caches(void) } target_completion_wq = alloc_workqueue("target_completion", - WQ_MEM_RECLAIM, 0); + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!target_completion_wq) goto out_free_lba_map_mem_cache; target_submission_wq = alloc_workqueue("target_submission", - WQ_MEM_RECLAIM, 0); + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!target_submission_wq) goto out_free_completion_wq; diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 877ce58c0a70..93534a6e14b7 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -462,7 +462,7 @@ static const struct target_core_fabric_ops xcopy_pt_tfo = { int target_xcopy_setup_pt(void) { - xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0); + xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!xcopy_wq) { pr_err("Unable to allocate xcopy_wq\n"); return -ENOMEM; diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 639fc358ed0f..f686d95d3273 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -250,7 +250,7 @@ static struct se_portal_group *ft_add_tpg(struct se_wwn *wwn, const char *name) tpg->lport_wwn = ft_wwn; INIT_LIST_HEAD(&tpg->lun_list); - wq = alloc_workqueue("tcm_fc", 0, 1); + wq = alloc_workqueue("tcm_fc", WQ_PERCPU, 1); if (!wq) { kfree(tpg); return NULL; From 8d5cad38cf7da7848a2f4d7ca5adb4110b2cd968 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 7 Nov 2025 16:52:57 +0100 Subject: [PATCH 15/15] scsi: pm80xx: Add WQ_PERCPU to alloc_workqueue() users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueues a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistency cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") This adds a new WQ_PERCPU flag to explicitly request to alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251107155257.316728-1-marco.crivellari@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 8ff4b89ff81e..9acca83d6958 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -1534,7 +1534,7 @@ static int __init pm8001_init(void) if (pm8001_use_tasklet && !pm8001_use_msix) pm8001_use_tasklet = false; - pm8001_wq = alloc_workqueue("pm80xx", 0, 0); + pm8001_wq = alloc_workqueue("pm80xx", WQ_PERCPU, 0); if (!pm8001_wq) goto err;