From eddb98ad9364b4e778768785d46cfab04ce52100 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Feb 2026 13:43:00 +0900 Subject: [PATCH 1/8] ata: libata-eh: correctly handle deferred qc timeouts A deferred qc may timeout while waiting for the device queue to drain to be submitted. In such case, since the qc is not active, ata_scsi_cmd_error_handler() ends up calling scsi_eh_finish_cmd(), which frees the qc. But as the port deferred_qc field still references this finished/freed qc, the deferred qc work may eventually attempt to call ata_qc_issue() against this invalid qc, leading to errors such as reported by UBSAN (syzbot run): UBSAN: shift-out-of-bounds in drivers/ata/libata-core.c:5166:24 shift exponent 4210818301 is too large for 64-bit type 'long long unsigned int' ... Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120 ubsan_epilogue+0xa/0x30 lib/ubsan.c:233 __ubsan_handle_shift_out_of_bounds+0x279/0x2a0 lib/ubsan.c:494 ata_qc_issue.cold+0x38/0x9f drivers/ata/libata-core.c:5166 ata_scsi_deferred_qc_work+0x154/0x1f0 drivers/ata/libata-scsi.c:1679 process_one_work+0x9d7/0x1920 kernel/workqueue.c:3275 process_scheduled_works kernel/workqueue.c:3358 [inline] worker_thread+0x5da/0xe40 kernel/workqueue.c:3439 kthread+0x370/0x450 kernel/kthread.c:467 ret_from_fork+0x754/0xd80 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fix this by checking if the qc of a timed out SCSI command is a deferred one, and in such case, clear the port deferred_qc field and finish the SCSI command with DID_TIME_OUT. Reported-by: syzbot+1f77b8ca15336fff21ff@syzkaller.appspotmail.com Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Igor Pylypiv --- drivers/ata/libata-eh.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 72a22b6c9682..b373cceb95d2 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -640,12 +640,28 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, set_host_byte(scmd, DID_OK); ata_qc_for_each_raw(ap, qc, i) { - if (qc->flags & ATA_QCFLAG_ACTIVE && - qc->scsicmd == scmd) + if (qc->scsicmd != scmd) + continue; + if ((qc->flags & ATA_QCFLAG_ACTIVE) || + qc == ap->deferred_qc) break; } - if (i < ATA_MAX_QUEUE) { + if (qc == ap->deferred_qc) { + /* + * This is a deferred command that timed out while + * waiting for the command queue to drain. Since the qc + * is not active yet (deferred_qc is still set, so the + * deferred qc work has not issued the command yet), + * simply signal the timeout by finishing the SCSI + * command and clear the deferred qc to prevent the + * deferred qc work from issuing this qc. + */ + WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); + ap->deferred_qc = NULL; + set_host_byte(scmd, DID_TIME_OUT); + scsi_eh_finish_cmd(scmd, &ap->eh_done_q); + } else if (i < ATA_MAX_QUEUE) { /* the scmd has an associated qc */ if (!(qc->flags & ATA_QCFLAG_EH)) { /* which hasn't failed yet, timeout */ From 55db009926634b20955bd8abbee921adbc8d2cb4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Feb 2026 12:09:12 +0900 Subject: [PATCH 2/8] ata: libata-core: fix cancellation of a port deferred qc work cancel_work_sync() is a sleeping function so it cannot be called with the spin lock of a port being held. Move the call to this function in ata_port_detach() after EH completes, with the port lock released, together with other work cancellation calls. Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Igor Pylypiv --- drivers/ata/libata-core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 11909417f017..ccbf320524da 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6269,10 +6269,6 @@ static void ata_port_detach(struct ata_port *ap) } } - /* Make sure the deferred qc work finished. */ - cancel_work_sync(&ap->deferred_qc_work); - WARN_ON(ap->deferred_qc); - /* Tell EH to disable all devices */ ap->pflags |= ATA_PFLAG_UNLOADING; ata_port_schedule_eh(ap); @@ -6283,9 +6279,11 @@ static void ata_port_detach(struct ata_port *ap) /* wait till EH commits suicide */ ata_port_wait_eh(ap); - /* it better be dead now */ + /* It better be dead now and not have any remaining deferred qc. */ WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); + WARN_ON(ap->deferred_qc); + cancel_work_sync(&ap->deferred_qc_work); cancel_delayed_work_sync(&ap->hotplug_task); cancel_delayed_work_sync(&ap->scsi_rescan_task); From b3b1d3ae1d87bc9398fb715c945968bf4c75a09a Mon Sep 17 00:00:00 2001 From: Maximilian Pezzullo Date: Wed, 4 Mar 2026 08:22:59 +0100 Subject: [PATCH 3/8] ata: libata-core: Disable LPM on ST1000DM010-2EP102 According to a user report, the ST1000DM010-2EP102 has problems with LPM, causing random system freezes. The drive belongs to the same BarraCuda family as the ST2000DM008-2FR102 which has the same issue. Cc: stable@vger.kernel.org Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Reported-by: Filippo Baiamonte Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221163 Signed-off-by: Maximilian Pezzullo Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ccbf320524da..76b012f544ea 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4189,6 +4189,7 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { ATA_QUIRK_FIRMWARE_WARN }, /* Seagate disks with LPM issues */ + { "ST1000DM010-2EP102", NULL, ATA_QUIRK_NOLPM }, { "ST2000DM008-2FR102", NULL, ATA_QUIRK_NOLPM }, /* drives which fail FPDMA_AA activation (some may freeze afterwards) From aac9b27f7c1f2b2cf7f50a9ca633ecbbcaf22af9 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 3 Mar 2026 11:03:42 +0100 Subject: [PATCH 4/8] ata: libata: cancel pending work after clearing deferred_qc Syzbot reported a WARN_ON() in ata_scsi_deferred_qc_work(), caused by ap->ops->qc_defer() returning non-zero before issuing the deferred qc. ata_scsi_schedule_deferred_qc() is called during each command completion. This function will check if there is a deferred QC, and if ap->ops->qc_defer() returns zero, meaning that it is possible to queue the deferred qc at this time (without being deferred), then it will queue the work which will issue the deferred qc. Once the work get to run, which can potentially be a very long time after the work was scheduled, there is a WARN_ON() if ap->ops->qc_defer() returns non-zero. While we hold the ap->lock both when assigning and clearing deferred_qc, and the work itself holds the ap->lock, the code currently does not cancel the work after clearing the deferred qc. This means that the following scenario can happen: 1) One or several NCQ commands are queued. 2) A non-NCQ command is queued, gets stored in ap->deferred_qc. 3) Last NCQ command gets completed, work is queued to issue the deferred qc. 4) Timeout or error happens, ap->deferred_qc is cleared. The queued work is currently NOT canceled. 5) Port is reset. 6) One or several NCQ commands are queued. 7) A non-NCQ command is queued, gets stored in ap->deferred_qc. 8) Work is finally run. Yet at this time, there is still NCQ commands in flight. The work in 8) really belongs to the non-NCQ command in 2), not to the non-NCQ command in 7). The reason why the work is executed when it is not supposed to, is because it was never canceled when ap->deferred_qc was cleared in 4). Thus, ensure that we always cancel the work after clearing ap->deferred_qc. Another potential fix would have been to let ata_scsi_deferred_qc_work() do nothing if ap->ops->qc_defer() returns non-zero. However, canceling the work when clearing ap->deferred_qc seems slightly more logical, as we hold the ap->lock when clearing ap->deferred_qc, so we know that the work cannot be holding the lock. (The function could be waiting for the lock, but that is okay since it will do nothing if ap->deferred_qc is not set.) Reported-by: syzbot+bcaf842a1e8ead8dfb89@syzkaller.appspotmail.com Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Fixes: eddb98ad9364 ("ata: libata-eh: correctly handle deferred qc timeouts") Reviewed-by: Igor Pylypiv Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-eh.c | 1 + drivers/ata/libata-scsi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b373cceb95d2..563432400f72 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -659,6 +659,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, */ WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); ap->deferred_qc = NULL; + cancel_work(&ap->deferred_qc_work); set_host_byte(scmd, DID_TIME_OUT); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } else if (i < ATA_MAX_QUEUE) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index c0dd75a0287c..ad798e5246b4 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1699,6 +1699,7 @@ void ata_scsi_requeue_deferred_qc(struct ata_port *ap) scmd = qc->scsicmd; ap->deferred_qc = NULL; + cancel_work(&ap->deferred_qc_work); ata_qc_free(qc); scmd->result = (DID_SOFT_ERROR << 16); scsi_done(scmd); From b92b0075ee1870f78f59ab1f7da7dbfdd718ad7a Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Thu, 5 Mar 2026 14:53:12 +0000 Subject: [PATCH 5/8] ata: libata-core: Add BRIDGE_OK quirk for QEMU drives Currently, whenever you boot with a QEMU drive over an AHCI interface, you get: [ 1.632121] ata1.00: applying bridge limits This happens due to the kernel not believing the given drive is SATA, since word 93 of IDENTIFY (ATA_ID_HW_CONFIG) is non-zero. The result is a pretty severe limit in max_hw_sectors_kb, which limits our IO sizes. QEMU has set word 93 erroneously for SATA drives but does not, in any way, emulate any of these real hardware details. There is no PATA drive and no SATA cable. As such, add a BRIDGE_OK quirk for QEMU HARDDISK. Special care is taken to limit this quirk to "2.5+", to allow for fixed future versions. This results in the max_hw_sectors being limited solely by the controller interface's limits. Which, for AHCI controllers, takes it from 128KB to 32767KB. Cc: stable@vger.kernel.org Signed-off-by: Pedro Falcato Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 76b012f544ea..6c4e567b6582 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4232,6 +4232,7 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { /* Devices that do not need bridging limits applied */ { "MTRON MSP-SATA*", NULL, ATA_QUIRK_BRIDGE_OK }, { "BUFFALO HD-QSU2/R5", NULL, ATA_QUIRK_BRIDGE_OK }, + { "QEMU HARDDISK", "2.5+", ATA_QUIRK_BRIDGE_OK }, /* Devices which aren't very happy with higher link speeds */ { "WD My Book", NULL, ATA_QUIRK_1_5_GBPS }, From ee0e6e69a772d601e152e5368a1da25d656122a8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Mar 2026 18:48:05 -0800 Subject: [PATCH 6/8] ata: libata-eh: Fix detection of deferred qc timeouts If the ata_qc_for_each_raw() loop finishes without finding a matching SCSI command for any QC, the variable qc will hold a pointer to the last element examined, which has the tag i == ATA_MAX_QUEUE - 1. This qc can match the port deferred QC (ap->deferred_qc). If that happens, the condition qc == ap->deferred_qc evaluates to true despite the loop not breaking with a match on the SCSI command for this QC. In that case, the error handler mistakenly intercepts a command that has not been issued yet and that has not timed out, and thus erroneously returning a timeout error. Fix the problem by checking for i < ATA_MAX_QUEUE in addition to qc == ap->deferred_qc. The problem was found by an experimental code review agent based on gemini-3.1-pro while reviewing backports into v6.18.y. Assisted-by: Gemini:gemini-3.1-pro Fixes: eddb98ad9364 ("ata: libata-eh: correctly handle deferred qc timeouts") Signed-off-by: Guenter Roeck [cassel: modified commit log as suggested by Damien] Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-eh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 563432400f72..23be85418b3b 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -647,7 +647,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, break; } - if (qc == ap->deferred_qc) { + if (i < ATA_MAX_QUEUE && qc == ap->deferred_qc) { /* * This is a deferred command that timed out while * waiting for the command queue to drain. Since the qc From ce5ae93d1a216680460040c7c0465a6e3b629dec Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 15 Mar 2026 07:24:15 +0900 Subject: [PATCH 7/8] ata: libata-core: disable LPM on ADATA SU680 SSD ADATA SU680 SSDs suffer from NCQ read and write commands timeouts or bus errors when link power management (LPM) is enabled. Flag these devices with the ATA_QUIRK_NOLPM quirk to prevent the use of LPM and avoid these command failures. Reported-by: Mohammad Khaled Bayan Closes: https://bugs.launchpad.net/ubuntu/+source/linux-hwe-6.17/+bug/2144060 Cc: stable@vger.kernel.org Tested-by: Mohammad-Khaled Bayan Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6c4e567b6582..374993031895 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4188,6 +4188,9 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_QUIRK_NONCQ | ATA_QUIRK_FIRMWARE_WARN }, + /* ADATA devices with LPM issues. */ + { "ADATA SU680", NULL, ATA_QUIRK_NOLPM }, + /* Seagate disks with LPM issues */ { "ST1000DM010-2EP102", NULL, ATA_QUIRK_NOLPM }, { "ST2000DM008-2FR102", NULL, ATA_QUIRK_NOLPM }, From e6d7eba23b666d85cacee0643be280d6ce1ebffc Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Mar 2026 12:48:01 +0900 Subject: [PATCH 8/8] ata: libata-scsi: report correct sense field pointer in ata_scsiop_maint_in() Commit 4ab7bb976343 ("ata: libata-scsi: Refactor ata_scsiop_maint_in()") modified ata_scsiop_maint_in() to directly call ata_scsi_set_invalid_field() to set the field pointer of the sense data of a failed MAINTENANCE IN command. However, in the case of an invalid command format, the sense data field incorrectly indicates byte 1 of the CDB. Fix this to indicate byte 2 of the command. Reported-by: Guenter Roeck Fixes: 4ab7bb976343 ("ata: libata-scsi: Refactor ata_scsiop_maint_in()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index ad798e5246b4..3b65df914ebb 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3600,7 +3600,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_device *dev, if (cdb[2] != 1 && cdb[2] != 3) { ata_dev_warn(dev, "invalid command format %d\n", cdb[2]); - ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); + ata_scsi_set_invalid_field(dev, cmd, 2, 0xff); return 0; }