mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-01 01:14:19 -04:00
habanalabs: there is no kernel TDR in future ASICs
In future ASICs, there is no kernel TDR for new workloads that are submitted directly from user-space to the device. Therefore, the driver can NEVER know that a workload has timed-out. So, when the user asks us to wait for interrupt on the workload's completion, and the wait has timed-out, it doesn't mean the workload has timed-out. It only means the wait has timed-out, which is NOT an error from driver's perspective. Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
@@ -2932,11 +2932,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
rc = -EIO;
|
||||
*status = HL_WAIT_CS_STATUS_ABORTED;
|
||||
} else {
|
||||
dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
|
||||
interrupt->interrupt_id);
|
||||
rc = -ETIMEDOUT;
|
||||
/* The wait has timed-out. We don't know anything beyond that
|
||||
* because the workload wasn't submitted through the driver.
|
||||
* Therefore, from driver's perspective, the workload is still
|
||||
* executing.
|
||||
*/
|
||||
rc = 0;
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3049,6 +3052,12 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
|
||||
interrupt->interrupt_id);
|
||||
rc = -EINTR;
|
||||
} else {
|
||||
/* The wait has timed-out. We don't know anything beyond that
|
||||
* because the workload wasn't submitted through the driver.
|
||||
* Therefore, from driver's perspective, the workload is still
|
||||
* executing.
|
||||
*/
|
||||
rc = 0;
|
||||
*status = HL_WAIT_CS_STATUS_BUSY;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user