mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 12:21:22 -05:00
scsi: sg: Fix occasional bogus elapsed time that exceeds timeout
A race condition was found in sg_proc_debug_helper(). It was observed on a system using an IBM LTO-9 SAS Tape Drive (ULTRIUM-TD9) and monitoring /proc/scsi/sg/debug every second. A very large elapsed time would sometimes appear. This is caused by two race conditions. We reproduced the issue with an IBM ULTRIUM-HH9 tape drive on an x86_64 architecture. A patched kernel was built, and the race condition could not be observed anymore after the application of this patch. A reproducer C program utilising the scsi_debug module was also built by Changhui Zhong and can be viewed here: https://github.com/MichaelRabek/linux-tests/blob/master/drivers/scsi/sg/sg_race_trigger.c The first race happens between the reading of hp->duration in sg_proc_debug_helper() and request completion in sg_rq_end_io(). The hp->duration member variable may hold either of two types of information: #1 - The start time of the request. This value is present while the request is not yet finished. #2 - The total execution time of the request (end_time - start_time). If sg_proc_debug_helper() executes *after* the value of hp->duration was changed from #1 to #2, but *before* srp->done is set to 1 in sg_rq_end_io(), a fresh timestamp is taken in the else branch, and the elapsed time (value type #2) is subtracted from a timestamp, which cannot yield a valid elapsed time (which is a type #2 value as well). To fix this issue, the value of hp->duration must change under the protection of the sfp->rq_list_lock in sg_rq_end_io(). Since sg_proc_debug_helper() takes this read lock, the change to srp->done and srp->header.duration will happen atomically from the perspective of sg_proc_debug_helper() and the race condition is thus eliminated. The second race condition happens between sg_proc_debug_helper() and sg_new_write(). Even though hp->duration is set to the current time stamp in sg_add_request() under the write lock's protection, it gets overwritten by a call to get_sg_io_hdr(), which calls copy_from_user() to copy struct sg_io_hdr from userspace into kernel space. hp->duration is set to the start time again in sg_common_write(). If sg_proc_debug_helper() is called between these two calls, an arbitrary value set by userspace (usually zero) is used to compute the elapsed time. To fix this issue, hp->duration must be set to the current timestamp again after get_sg_io_hdr() returns successfully. A small race window still exists between get_sg_io_hdr() and setting hp->duration, but this window is only a few instructions wide and does not result in observable issues in practice, as confirmed by testing. Additionally, we fix the format specifier from %d to %u for printing unsigned int values in sg_proc_debug_helper(). Signed-off-by: Michal Rábek <mrabek@redhat.com> Suggested-by: Tomas Henzl <thenzl@redhat.com> Tested-by: Changhui Zhong <czhong@redhat.com> Reviewed-by: Ewan D. Milne <emilne@redhat.com> Reviewed-by: John Meneghini <jmeneghi@redhat.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Link: https://patch.msgid.link/20251212160900.64924-1-mrabek@redhat.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
committed by
Martin K. Petersen
parent
d373163194
commit
0e16776542
@@ -731,6 +731,8 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
|
||||
sg_remove_request(sfp, srp);
|
||||
return -EFAULT;
|
||||
}
|
||||
hp->duration = jiffies_to_msecs(jiffies);
|
||||
|
||||
if (hp->interface_id != 'S') {
|
||||
sg_remove_request(sfp, srp);
|
||||
return -ENOSYS;
|
||||
@@ -815,7 +817,6 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
hp->duration = jiffies_to_msecs(jiffies);
|
||||
if (hp->interface_id != '\0' && /* v3 (or later) interface */
|
||||
(SG_FLAG_Q_AT_TAIL & hp->flags))
|
||||
at_head = 0;
|
||||
@@ -1338,9 +1339,6 @@ sg_rq_end_io(struct request *rq, blk_status_t status)
|
||||
"sg_cmd_done: pack_id=%d, res=0x%x\n",
|
||||
srp->header.pack_id, result));
|
||||
srp->header.resid = resid;
|
||||
ms = jiffies_to_msecs(jiffies);
|
||||
srp->header.duration = (ms > srp->header.duration) ?
|
||||
(ms - srp->header.duration) : 0;
|
||||
if (0 != result) {
|
||||
struct scsi_sense_hdr sshdr;
|
||||
|
||||
@@ -1389,6 +1387,9 @@ sg_rq_end_io(struct request *rq, blk_status_t status)
|
||||
done = 0;
|
||||
}
|
||||
srp->done = done;
|
||||
ms = jiffies_to_msecs(jiffies);
|
||||
srp->header.duration = (ms > srp->header.duration) ?
|
||||
(ms - srp->header.duration) : 0;
|
||||
write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
|
||||
|
||||
if (likely(done)) {
|
||||
@@ -2533,6 +2534,7 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp)
|
||||
const sg_io_hdr_t *hp;
|
||||
const char * cp;
|
||||
unsigned int ms;
|
||||
unsigned int duration;
|
||||
|
||||
k = 0;
|
||||
list_for_each_entry(fp, &sdp->sfds, sfd_siblings) {
|
||||
@@ -2570,13 +2572,17 @@ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp)
|
||||
seq_printf(s, " id=%d blen=%d",
|
||||
srp->header.pack_id, blen);
|
||||
if (srp->done)
|
||||
seq_printf(s, " dur=%d", hp->duration);
|
||||
seq_printf(s, " dur=%u", hp->duration);
|
||||
else {
|
||||
ms = jiffies_to_msecs(jiffies);
|
||||
seq_printf(s, " t_o/elap=%d/%d",
|
||||
duration = READ_ONCE(hp->duration);
|
||||
if (duration)
|
||||
duration = (ms > duration ?
|
||||
ms - duration : 0);
|
||||
seq_printf(s, " t_o/elap=%u/%u",
|
||||
(new_interface ? hp->timeout :
|
||||
jiffies_to_msecs(fp->timeout)),
|
||||
(ms > hp->duration ? ms - hp->duration : 0));
|
||||
duration);
|
||||
}
|
||||
seq_printf(s, "ms sgat=%d op=0x%02x\n", usg,
|
||||
(int) srp->data.cmd_opcode);
|
||||
|
||||
Reference in New Issue
Block a user