mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-07 09:14:07 -04:00
Merge tag 'drm-habanalabs-next-2025-09-25' of https://github.com/HabanaAI/drivers.accel.habanalabs.kernel into drm-next
This tag contains habanalabs driver changes for v6.18. It continues the previous upstream work from tags/drm-habanalabs-next-2024-06-23, Including improvements in debug and visibility, alongside general code cleanups, and new features such as vmalloc-backed coherent mmap, HLDIO infrastructure, etc. Signed-off-by: Dave Airlie <airlied@redhat.com> From: "Elbaz, Koby" <koby.elbaz@intel.com> Link: https://lore.kernel.org/r/da02d370-9967-49d2-9eef-7aeaa40c987c@intel.com
This commit is contained in:
@@ -27,3 +27,26 @@ config DRM_ACCEL_HABANALABS
|
||||
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called habanalabs.
|
||||
|
||||
if DRM_ACCEL_HABANALABS
|
||||
|
||||
config HL_HLDIO
|
||||
bool "Habanalabs NVMe Direct I/O (HLDIO)"
|
||||
depends on PCI_P2PDMA
|
||||
depends on BLOCK
|
||||
help
|
||||
Enable NVMe peer-to-peer direct I/O support for Habanalabs AI
|
||||
accelerators.
|
||||
|
||||
This allows direct data transfers between NVMe storage devices
|
||||
and Habanalabs accelerators without involving system memory,
|
||||
using PCI peer-to-peer DMA capabilities.
|
||||
|
||||
Requirements:
|
||||
- CONFIG_PCI_P2PDMA=y
|
||||
- NVMe device and Habanalabs accelerator under same PCI root complex
|
||||
- IOMMU disabled or in passthrough mode
|
||||
- Hardware supporting PCI P2P DMA
|
||||
|
||||
If unsure, say N
|
||||
endif # DRM_ACCEL_HABANALABS
|
||||
|
||||
@@ -13,3 +13,8 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
||||
common/command_submission.o common/firmware_if.o \
|
||||
common/security.o common/state_dump.o \
|
||||
common/memory_mgr.o common/decoder.o
|
||||
|
||||
# Conditionally add HLDIO support
|
||||
ifdef CONFIG_HL_HLDIO
|
||||
HL_COMMON_FILES += common/hldio.o
|
||||
endif
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
#include "hldio.h"
|
||||
#include "../include/hw_ip/mmu/mmu_general.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
@@ -602,6 +603,198 @@ static int engines_show(struct seq_file *s, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
/* DIO debugfs functions following the standard pattern */
|
||||
static int dio_ssd2hl_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
|
||||
if (!hdev->asic_prop.supports_nvme) {
|
||||
seq_puts(s, "NVMe Direct I/O not supported\\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n");
|
||||
seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read);
|
||||
seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *f_pos)
|
||||
{
|
||||
struct seq_file *s = file->private_data;
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
struct hl_ctx *ctx = hdev->kernel_ctx;
|
||||
char kbuf[128];
|
||||
u64 device_va = 0, off_bytes = 0, len_bytes = 0;
|
||||
u32 fd = 0;
|
||||
size_t len_read = 0;
|
||||
int rc, parsed;
|
||||
|
||||
if (!hdev->asic_prop.supports_nvme)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (count >= sizeof(kbuf))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(kbuf, buf, count))
|
||||
return -EFAULT;
|
||||
|
||||
kbuf[count] = 0;
|
||||
|
||||
/* Parse: fd=N va=0xADDR off=N len=N */
|
||||
parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu",
|
||||
&fd, &device_va, &off_bytes, &len_bytes);
|
||||
if (parsed != 4) {
|
||||
dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Validate file descriptor */
|
||||
if (fd == 0) {
|
||||
dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Validate alignment requirements */
|
||||
if (!IS_ALIGNED(device_va, PAGE_SIZE) ||
|
||||
!IS_ALIGNED(off_bytes, PAGE_SIZE) ||
|
||||
!IS_ALIGNED(len_bytes, PAGE_SIZE)) {
|
||||
dev_err(hdev->dev,
|
||||
"All parameters must be page-aligned (4KB)\\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Validate transfer size */
|
||||
if (len_bytes == 0 || len_bytes > SZ_1G) {
|
||||
dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n",
|
||||
len_bytes);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n",
|
||||
fd, device_va, off_bytes, len_bytes);
|
||||
|
||||
rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read);
|
||||
if (rc < 0) {
|
||||
dev_entry->dio_stats.failed_ops++;
|
||||
dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Update statistics */
|
||||
dev_entry->dio_stats.total_ops++;
|
||||
dev_entry->dio_stats.successful_ops++;
|
||||
dev_entry->dio_stats.bytes_transferred += len_read;
|
||||
dev_entry->dio_stats.last_len_read = len_read;
|
||||
|
||||
dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int dio_hl2ssd_show(struct seq_file *s, void *data)
|
||||
{
|
||||
seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *f_pos)
|
||||
{
|
||||
struct seq_file *s = file->private_data;
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
|
||||
if (!hdev->asic_prop.supports_nvme)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int dio_stats_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
struct hl_dio_stats *stats = &dev_entry->dio_stats;
|
||||
u64 avg_bytes_per_op = 0, success_rate = 0;
|
||||
|
||||
if (!hdev->asic_prop.supports_nvme) {
|
||||
seq_puts(s, "NVMe Direct I/O not supported\\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (stats->successful_ops > 0)
|
||||
avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops;
|
||||
|
||||
if (stats->total_ops > 0)
|
||||
success_rate = (stats->successful_ops * 100) / stats->total_ops;
|
||||
|
||||
seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n");
|
||||
seq_printf(s, "Total operations: %llu\\n", stats->total_ops);
|
||||
seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops);
|
||||
seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops);
|
||||
seq_printf(s, "Success rate: %llu%%\\n", success_rate);
|
||||
seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred);
|
||||
seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op);
|
||||
seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dio_reset_show(struct seq_file *s, void *data)
|
||||
{
|
||||
seq_puts(s, "Write '1' to reset DIO statistics\\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t dio_reset_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *f_pos)
|
||||
{
|
||||
struct seq_file *s = file->private_data;
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
char kbuf[8];
|
||||
unsigned long val;
|
||||
int rc;
|
||||
|
||||
if (!hdev->asic_prop.supports_nvme)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (count >= sizeof(kbuf))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(kbuf, buf, count))
|
||||
return -EFAULT;
|
||||
|
||||
kbuf[count] = 0;
|
||||
|
||||
rc = kstrtoul(kbuf, 0, &val);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (val == 1) {
|
||||
memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
|
||||
dev_dbg(hdev->dev, "DIO statistics reset\\n");
|
||||
} else {
|
||||
dev_err(hdev->dev, "Write '1' to reset statistics\\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
@@ -788,6 +981,113 @@ static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_cfg_access_entry(struct hl_device *hdev,
|
||||
struct hl_debugfs_cfg_access_entry *entry)
|
||||
{
|
||||
char *access_type = "";
|
||||
struct tm tm;
|
||||
|
||||
switch (entry->debugfs_type) {
|
||||
case DEBUGFS_READ32:
|
||||
access_type = "READ32 from";
|
||||
break;
|
||||
case DEBUGFS_WRITE32:
|
||||
access_type = "WRITE32 to";
|
||||
break;
|
||||
case DEBUGFS_READ64:
|
||||
access_type = "READ64 from";
|
||||
break;
|
||||
case DEBUGFS_WRITE64:
|
||||
access_type = "WRITE64 to";
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type);
|
||||
return;
|
||||
}
|
||||
|
||||
time64_to_tm(entry->seconds_since_epoch, 0, &tm);
|
||||
dev_info(hdev->dev,
|
||||
"%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1,
|
||||
tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr);
|
||||
}
|
||||
|
||||
void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses;
|
||||
u32 i, head, count = 0;
|
||||
time64_t entry_time, now;
|
||||
unsigned long flags;
|
||||
|
||||
now = ktime_get_real_seconds();
|
||||
|
||||
spin_lock_irqsave(&dbgfs->lock, flags);
|
||||
head = dbgfs->head;
|
||||
if (head == 0)
|
||||
i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
|
||||
else
|
||||
i = head - 1;
|
||||
|
||||
/* Walk back until timeout or invalid entry */
|
||||
while (dbgfs->cfg_access_list[i].valid) {
|
||||
entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch;
|
||||
/* Stop when entry is older than timeout */
|
||||
if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC)
|
||||
break;
|
||||
|
||||
/* print single entry under lock */
|
||||
{
|
||||
struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i];
|
||||
/*
|
||||
* We copy the entry out under lock and then print after
|
||||
* releasing the lock to minimize time under lock.
|
||||
*/
|
||||
spin_unlock_irqrestore(&dbgfs->lock, flags);
|
||||
dump_cfg_access_entry(hdev, &entry);
|
||||
spin_lock_irqsave(&dbgfs->lock, flags);
|
||||
}
|
||||
|
||||
/* mark consumed */
|
||||
dbgfs->cfg_access_list[i].valid = false;
|
||||
|
||||
if (i == 0)
|
||||
i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
|
||||
else
|
||||
i--;
|
||||
count++;
|
||||
if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN)
|
||||
break;
|
||||
}
|
||||
spin_unlock_irqrestore(&dbgfs->lock, flags);
|
||||
}
|
||||
|
||||
static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size,
|
||||
enum debugfs_access_type access_type)
|
||||
{
|
||||
struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses;
|
||||
struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG];
|
||||
struct hl_debugfs_cfg_access_entry *new_entry;
|
||||
unsigned long flags;
|
||||
|
||||
/* Check if address is in config memory */
|
||||
if (addr >= mem_reg->region_base &&
|
||||
mem_reg->region_size >= access_size &&
|
||||
addr <= mem_reg->region_base + mem_reg->region_size - access_size) {
|
||||
|
||||
spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags);
|
||||
|
||||
new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head];
|
||||
new_entry->seconds_since_epoch = ktime_get_real_seconds();
|
||||
new_entry->addr = addr;
|
||||
new_entry->debugfs_type = access_type;
|
||||
new_entry->valid = true;
|
||||
dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1)
|
||||
% HL_DBGFS_CFG_ACCESS_HIST_LEN;
|
||||
|
||||
spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
|
||||
enum debugfs_access_type acc_type)
|
||||
{
|
||||
@@ -805,6 +1105,7 @@ static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
|
||||
return rc;
|
||||
}
|
||||
|
||||
check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type);
|
||||
rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
@@ -1525,6 +1826,13 @@ static const struct hl_info_list hl_debugfs_list[] = {
|
||||
{"mmu", mmu_show, mmu_asid_va_write},
|
||||
{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
|
||||
{"engines", engines_show, NULL},
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
/* DIO entries - only created if NVMe is supported */
|
||||
{"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write},
|
||||
{"dio_stats", dio_stats_show, NULL},
|
||||
{"dio_reset", dio_reset_show, dio_reset_write},
|
||||
{"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write},
|
||||
#endif
|
||||
};
|
||||
|
||||
static int hl_debugfs_open(struct inode *inode, struct file *file)
|
||||
@@ -1723,6 +2031,11 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
|
||||
&hdev->asic_prop.server_type);
|
||||
|
||||
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
|
||||
/* Skip DIO entries if NVMe is not supported */
|
||||
if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 &&
|
||||
!hdev->asic_prop.supports_nvme)
|
||||
continue;
|
||||
|
||||
debugfs_create_file(hl_debugfs_list[i].name,
|
||||
0644,
|
||||
root,
|
||||
@@ -1762,6 +2075,14 @@ int hl_debugfs_device_init(struct hl_device *hdev)
|
||||
spin_lock_init(&dev_entry->userptr_spinlock);
|
||||
mutex_init(&dev_entry->ctx_mem_hash_mutex);
|
||||
|
||||
spin_lock_init(&hdev->debugfs_cfg_accesses.lock);
|
||||
hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */
|
||||
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
/* Initialize DIO statistics */
|
||||
memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1780,6 +2101,7 @@ void hl_debugfs_device_fini(struct hl_device *hdev)
|
||||
vfree(entry->state_dump[i]);
|
||||
|
||||
kfree(entry->entry_arr);
|
||||
|
||||
}
|
||||
|
||||
void hl_debugfs_add_device(struct hl_device *hdev)
|
||||
@@ -1792,6 +2114,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
||||
|
||||
if (!hdev->asic_prop.fw_security_enabled)
|
||||
add_secured_nodes(dev_entry, dev_entry->root);
|
||||
|
||||
}
|
||||
|
||||
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
|
||||
@@ -1924,3 +2247,4 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||
|
||||
up_write(&dev_entry->state_dump_sem);
|
||||
}
|
||||
|
||||
|
||||
@@ -1630,6 +1630,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||
from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
|
||||
reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release;
|
||||
|
||||
if (hdev->cpld_shutdown) {
|
||||
dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) {
|
||||
dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n");
|
||||
return 0;
|
||||
@@ -2576,6 +2581,14 @@ void hl_device_fini(struct hl_device *hdev)
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);
|
||||
|
||||
/* Reset the H/W (if it accessible). It will be in idle state after this returns */
|
||||
if (!hdev->cpld_shutdown) {
|
||||
rc = hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"hw_fini failed in device fini while removing device %d\n", rc);
|
||||
}
|
||||
|
||||
hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
|
||||
|
||||
/* Release kernel context */
|
||||
@@ -2943,3 +2956,13 @@ void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *eve
|
||||
|
||||
mutex_unlock(&clk_throttle->lock);
|
||||
}
|
||||
|
||||
void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask)
|
||||
{
|
||||
hl_handle_critical_hw_err(hdev, event_id, event_mask);
|
||||
*event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
|
||||
|
||||
/* Avoid any new accesses to the H/W */
|
||||
hdev->disabled = true;
|
||||
hdev->cpld_shutdown = true;
|
||||
}
|
||||
|
||||
@@ -90,7 +90,9 @@ struct hl_fpriv;
|
||||
#define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF
|
||||
#define HL_COMMON_DEC_INTERRUPT_ID 0xFFE
|
||||
|
||||
#define HL_STATE_DUMP_HIST_LEN 5
|
||||
#define HL_STATE_DUMP_HIST_LEN 5
|
||||
#define HL_DBGFS_CFG_ACCESS_HIST_LEN 20
|
||||
#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */
|
||||
|
||||
/* Default value for device reset trigger , an invalid value */
|
||||
#define HL_RESET_TRIGGER_DEFAULT 0xFF
|
||||
@@ -702,6 +704,7 @@ struct hl_hints_range {
|
||||
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
|
||||
* @supports_engine_modes: true if changing engines/engine_cores modes is supported.
|
||||
* @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
|
||||
* @supports_nvme: indicates whether the asic supports NVMe P2P DMA.
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties *hw_queues_props;
|
||||
@@ -822,6 +825,7 @@ struct asic_fixed_properties {
|
||||
u8 supports_advanced_cpucp_rc;
|
||||
u8 supports_engine_modes;
|
||||
u8 support_dynamic_resereved_fw_size;
|
||||
u8 supports_nvme;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -2274,6 +2278,9 @@ struct hl_vm {
|
||||
u8 init_done;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
#include "hldio.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DEBUG, PROFILING STRUCTURE
|
||||
@@ -2344,7 +2351,6 @@ struct hl_fpriv {
|
||||
struct mutex ctx_lock;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* DebugFS
|
||||
*/
|
||||
@@ -2372,6 +2378,7 @@ struct hl_debugfs_entry {
|
||||
struct hl_dbg_device_entry *dev_entry;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct hl_dbg_device_entry - ASIC specific debugfs manager.
|
||||
* @root: root dentry.
|
||||
@@ -2403,6 +2410,7 @@ struct hl_debugfs_entry {
|
||||
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
|
||||
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
|
||||
* @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read.
|
||||
* @dio_stats: Direct I/O statistics
|
||||
*/
|
||||
struct hl_dbg_device_entry {
|
||||
struct dentry *root;
|
||||
@@ -2434,6 +2442,35 @@ struct hl_dbg_device_entry {
|
||||
u8 i2c_addr;
|
||||
u8 i2c_reg;
|
||||
u8 i2c_len;
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
struct hl_dio_stats dio_stats;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of
|
||||
* hl_debugfs_cfg_access.
|
||||
* @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons.
|
||||
* @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64.
|
||||
* @addr: the requested address to access.
|
||||
* @valid: if set, this entry has valid data for dumping at interrupt time.
|
||||
*/
|
||||
struct hl_debugfs_cfg_access_entry {
|
||||
ktime_t seconds_since_epoch;
|
||||
enum debugfs_access_type debugfs_type;
|
||||
u64 addr;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_debugfs_cfg_access - saves debugfs config region access requests history.
|
||||
* @cfg_access_list: list of objects describing config region access requests.
|
||||
* @head: next valid index to add new entry to in cfg_access_list.
|
||||
*/
|
||||
struct hl_debugfs_cfg_access {
|
||||
struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN];
|
||||
u32 head;
|
||||
spinlock_t lock; /* protects head and entries */
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -3281,6 +3318,7 @@ struct eq_heartbeat_debug_info {
|
||||
* @hl_chip_info: ASIC's sensors information.
|
||||
* @device_status_description: device status description.
|
||||
* @hl_debugfs: device's debugfs manager.
|
||||
* @debugfs_cfg_accesses: list of last debugfs config region accesses.
|
||||
* @cb_pool: list of pre allocated CBs.
|
||||
* @cb_pool_lock: protects the CB pool.
|
||||
* @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
|
||||
@@ -3305,6 +3343,7 @@ struct eq_heartbeat_debug_info {
|
||||
* @captured_err_info: holds information about errors.
|
||||
* @reset_info: holds current device reset information.
|
||||
* @heartbeat_debug_info: counters used to debug heartbeat failures.
|
||||
* @hldio: describes habanalabs direct storage interaction interface.
|
||||
* @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
|
||||
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
|
||||
* @fw_inner_major_ver: the major of current loaded preboot inner version.
|
||||
@@ -3357,6 +3396,7 @@ struct eq_heartbeat_debug_info {
|
||||
* addresses.
|
||||
* @is_in_dram_scrub: true if dram scrub operation is on going.
|
||||
* @disabled: is device disabled.
|
||||
* @cpld_shutdown: is cpld shutdown.
|
||||
* @late_init_done: is late init stage was done during initialization.
|
||||
* @hwmon_initialized: is H/W monitor sensors was initialized.
|
||||
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
|
||||
@@ -3461,6 +3501,7 @@ struct hl_device {
|
||||
struct hwmon_chip_info *hl_chip_info;
|
||||
|
||||
struct hl_dbg_device_entry hl_debugfs;
|
||||
struct hl_debugfs_cfg_access debugfs_cfg_accesses;
|
||||
|
||||
struct list_head cb_pool;
|
||||
spinlock_t cb_pool_lock;
|
||||
@@ -3496,7 +3537,9 @@ struct hl_device {
|
||||
struct hl_reset_info reset_info;
|
||||
|
||||
struct eq_heartbeat_debug_info heartbeat_debug_info;
|
||||
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
struct hl_dio hldio;
|
||||
#endif
|
||||
cpumask_t irq_affinity_mask;
|
||||
|
||||
u32 *stream_master_qid_arr;
|
||||
@@ -3532,6 +3575,7 @@ struct hl_device {
|
||||
u16 cpu_pci_msb_addr;
|
||||
u8 is_in_dram_scrub;
|
||||
u8 disabled;
|
||||
u8 cpld_shutdown;
|
||||
u8 late_init_done;
|
||||
u8 hwmon_initialized;
|
||||
u8 reset_on_lockup;
|
||||
@@ -4089,6 +4133,7 @@ void hl_init_cpu_for_irq(struct hl_device *hdev);
|
||||
void hl_set_irq_affinity(struct hl_device *hdev, int irq);
|
||||
void hl_eq_heartbeat_event_handle(struct hl_device *hdev);
|
||||
void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask);
|
||||
void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
@@ -4110,6 +4155,7 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||
unsigned long length);
|
||||
void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev);
|
||||
|
||||
#else
|
||||
|
||||
@@ -4185,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Security */
|
||||
|
||||
@@ -961,6 +961,12 @@ static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *
|
||||
case HL_PASSTHROUGH_VERSIONS:
|
||||
need_input_buff = false;
|
||||
break;
|
||||
case HL_GET_ERR_COUNTERS_CMD:
|
||||
need_input_buff = true;
|
||||
break;
|
||||
case HL_GET_P_STATE:
|
||||
need_input_buff = false;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
437
drivers/accel/habanalabs/common/hldio.c
Normal file
437
drivers/accel/habanalabs/common/hldio.c
Normal file
@@ -0,0 +1,437 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2024 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "habanalabs.h"
|
||||
#include "hldio.h"
|
||||
#include <generated/uapi/linux/version.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
/*
|
||||
* NVMe Direct I/O implementation for habanalabs driver
|
||||
*
|
||||
* ASSUMPTIONS
|
||||
* ===========
|
||||
* 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless).
|
||||
* 2. Only READ operations (can extend in the future).
|
||||
* 3. No sparse files (can overcome this in the future).
|
||||
* 4. Kernel version >= 6.9
|
||||
* 5. Requiring page alignment is OK (I don't see a solution to this one right,
|
||||
* now, how do we read partial pages?)
|
||||
* 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel.
|
||||
* Theoretically I have a slight idea on how this could be solvable, but it
|
||||
* is probably inacceptable for the upstream. Also may not work in the end.
|
||||
* 7. Either make sure our cards and disks are under the same PCI bridge, or
|
||||
* compile a custom kernel to hack around this.
|
||||
*/
|
||||
|
||||
#define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */
|
||||
|
||||
/*
|
||||
* This struct contains all the useful data I could milk out of the file handle
|
||||
* provided by the user.
|
||||
* @TODO: right now it is retrieved on each IO, but can be done once with some
|
||||
* dedicated IOCTL, call it for example HL_REGISTER_HANDLE.
|
||||
*/
|
||||
struct hl_dio_fd {
|
||||
/* Back pointer in case we need it in async completion */
|
||||
struct hl_ctx *ctx;
|
||||
/* Associated fd struct */
|
||||
struct file *filp;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is a single IO descriptor
|
||||
*/
|
||||
struct hl_direct_io {
|
||||
struct hl_dio_fd f;
|
||||
struct kiocb kio;
|
||||
struct bio_vec *bv;
|
||||
struct iov_iter iter;
|
||||
u64 device_va;
|
||||
u64 off_bytes;
|
||||
u64 len_bytes;
|
||||
u32 type;
|
||||
};
|
||||
|
||||
bool hl_device_supports_nvme(struct hl_device *hdev)
|
||||
{
|
||||
return hdev->asic_prop.supports_nvme;
|
||||
}
|
||||
|
||||
static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct block_device *bd;
|
||||
struct super_block *sb;
|
||||
struct inode *inode;
|
||||
struct gendisk *gd;
|
||||
struct device *disk_dev;
|
||||
int rc;
|
||||
|
||||
f->filp = fget(fd);
|
||||
if (!f->filp) {
|
||||
rc = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(f->filp->f_flags & O_DIRECT)) {
|
||||
dev_err(hdev->dev, "file is not in the direct mode\n");
|
||||
rc = -EINVAL;
|
||||
goto fput;
|
||||
}
|
||||
|
||||
if (!f->filp->f_op->read_iter) {
|
||||
dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n");
|
||||
rc = -EINVAL;
|
||||
goto fput;
|
||||
}
|
||||
|
||||
inode = file_inode(f->filp);
|
||||
sb = inode->i_sb;
|
||||
bd = sb->s_bdev;
|
||||
gd = bd->bd_disk;
|
||||
|
||||
if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) {
|
||||
dev_err(hdev->dev, "sparse files are not currently supported\n");
|
||||
rc = -EINVAL;
|
||||
goto fput;
|
||||
}
|
||||
|
||||
if (!bd || !gd) {
|
||||
dev_err(hdev->dev, "invalid block device\n");
|
||||
rc = -ENODEV;
|
||||
goto fput;
|
||||
}
|
||||
/* Get the underlying device from the block device */
|
||||
disk_dev = disk_to_dev(gd);
|
||||
if (!dma_pci_p2pdma_supported(disk_dev)) {
|
||||
dev_err(hdev->dev, "device does not support PCI P2P DMA\n");
|
||||
rc = -EOPNOTSUPP;
|
||||
goto fput;
|
||||
}
|
||||
|
||||
/*
|
||||
* @TODO: Maybe we need additional checks here
|
||||
*/
|
||||
|
||||
f->ctx = ctx;
|
||||
rc = 0;
|
||||
|
||||
goto out;
|
||||
fput:
|
||||
fput(f->filp);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void hl_dio_fd_unregister(struct hl_dio_fd *f)
|
||||
{
|
||||
fput(f->filp);
|
||||
}
|
||||
|
||||
static long hl_dio_count_io(struct hl_device *hdev)
|
||||
{
|
||||
s64 sum = 0;
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
sum += per_cpu(*hdev->hldio.inflight_ios, i);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
static bool hl_dio_get_iopath(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
if (hdev->hldio.io_enabled) {
|
||||
this_cpu_inc(*hdev->hldio.inflight_ios);
|
||||
|
||||
/* Avoid race conditions */
|
||||
if (!hdev->hldio.io_enabled) {
|
||||
this_cpu_dec(*hdev->hldio.inflight_ios);
|
||||
return false;
|
||||
}
|
||||
|
||||
hl_ctx_get(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void hl_dio_put_iopath(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
||||
hl_ctx_put(ctx);
|
||||
this_cpu_dec(*hdev->hldio.inflight_ios);
|
||||
}
|
||||
|
||||
static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled)
|
||||
{
|
||||
hdev->hldio.io_enabled = enabled;
|
||||
}
|
||||
|
||||
static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io)
|
||||
{
|
||||
if ((u64)io->device_va & ~PAGE_MASK) {
|
||||
dev_dbg(hdev->dev, "device address must be 4K aligned\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (io->len_bytes & ~PAGE_MASK) {
|
||||
dev_dbg(hdev->dev, "IO length must be 4K aligned\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (io->off_bytes & ~PAGE_MASK) {
|
||||
dev_dbg(hdev->dev, "IO offset must be 4K aligned\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va)
|
||||
{
|
||||
struct hl_dio *hldio = &hdev->hldio;
|
||||
u64 device_pa;
|
||||
int rc, i;
|
||||
|
||||
rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)",
|
||||
device_va, rc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < hldio->np2prs ; ++i) {
|
||||
if (device_pa >= hldio->p2prs[i].device_pa &&
|
||||
device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size)
|
||||
return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >>
|
||||
PAGE_SHIFT];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io)
|
||||
{
|
||||
u64 npages, device_va;
|
||||
ssize_t rc;
|
||||
int i;
|
||||
|
||||
if (!hl_dio_validate_io(hdev, io))
|
||||
return -EINVAL;
|
||||
|
||||
if (!hl_dio_get_iopath(io->f.ctx)) {
|
||||
dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n");
|
||||
return -ESHUTDOWN;
|
||||
}
|
||||
|
||||
init_sync_kiocb(&io->kio, io->f.filp);
|
||||
io->kio.ki_pos = io->off_bytes;
|
||||
|
||||
npages = (io->len_bytes >> PAGE_SHIFT);
|
||||
|
||||
/* @TODO: this can be implemented smarter, vmalloc in iopath is not
|
||||
* ideal. Maybe some variation of genpool. Number of pages may differ
|
||||
* greatly, so maybe even use pools of different sizes and chose the
|
||||
* closest one.
|
||||
*/
|
||||
io->bv = vzalloc(npages * sizeof(struct bio_vec));
|
||||
if (!io->bv)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) {
|
||||
io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va);
|
||||
if (!io->bv[i].bv_page) {
|
||||
dev_err(hdev->dev, "error getting page struct for device va %#llx",
|
||||
device_va);
|
||||
rc = -EFAULT;
|
||||
goto cleanup;
|
||||
}
|
||||
io->bv[i].bv_offset = 0;
|
||||
io->bv[i].bv_len = PAGE_SIZE;
|
||||
}
|
||||
|
||||
iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes);
|
||||
if (io->f.filp->f_op && io->f.filp->f_op->read_iter)
|
||||
rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter);
|
||||
else
|
||||
rc = -EINVAL;
|
||||
|
||||
cleanup:
|
||||
vfree(io->bv);
|
||||
hl_dio_put_iopath(io->f.ctx);
|
||||
|
||||
dev_dbg(hdev->dev, "IO ended with %ld\n", rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* @TODO: This function can be used as a callback for io completion under
|
||||
* kio->ki_complete in order to implement async IO.
|
||||
* Note that on more recent kernels there is no ret2.
|
||||
*/
|
||||
__maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2)
|
||||
{
|
||||
struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio);
|
||||
|
||||
dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret);
|
||||
|
||||
/* Do something to copy result to user / notify completion */
|
||||
|
||||
hl_dio_put_iopath(io->f.ctx);
|
||||
|
||||
hl_dio_fd_unregister(&io->f);
|
||||
}
|
||||
|
||||
/*
|
||||
* DMA disk to ASIC, wait for results. Must be invoked from the user context
|
||||
*/
|
||||
int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
|
||||
u64 device_va, off_t off_bytes, size_t len_bytes,
|
||||
size_t *len_read)
|
||||
{
|
||||
struct hl_direct_io *io;
|
||||
ssize_t rc;
|
||||
|
||||
dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes);
|
||||
|
||||
io = kzalloc(sizeof(*io), GFP_KERNEL);
|
||||
if (!io) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*io = (struct hl_direct_io){
|
||||
.device_va = device_va,
|
||||
.len_bytes = len_bytes,
|
||||
.off_bytes = off_bytes,
|
||||
.type = READ,
|
||||
};
|
||||
|
||||
rc = hl_dio_fd_register(ctx, fd, &io->f);
|
||||
if (rc)
|
||||
goto kfree_io;
|
||||
|
||||
rc = hl_direct_io(hdev, io);
|
||||
if (rc >= 0) {
|
||||
*len_read = rc;
|
||||
rc = 0;
|
||||
}
|
||||
|
||||
/* This shall be called only in the case of a sync IO */
|
||||
hl_dio_fd_unregister(&io->f);
|
||||
kfree_io:
|
||||
kfree(io);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr)
|
||||
{
|
||||
if (p2pr->p2ppages) {
|
||||
vfree(p2pr->p2ppages);
|
||||
p2pr->p2ppages = NULL;
|
||||
}
|
||||
|
||||
if (p2pr->p2pmem) {
|
||||
dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n",
|
||||
p2pr->p2pmem, p2pr->size);
|
||||
pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size);
|
||||
p2pr->p2pmem = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void hl_p2p_region_fini_all(struct hl_device *hdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < hdev->hldio.np2prs ; ++i)
|
||||
hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]);
|
||||
|
||||
kvfree(hdev->hldio.p2prs);
|
||||
hdev->hldio.p2prs = NULL;
|
||||
hdev->hldio.np2prs = 0;
|
||||
}
|
||||
|
||||
int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
|
||||
{
|
||||
void *addr;
|
||||
int rc, i;
|
||||
|
||||
/* Start by publishing our p2p memory */
|
||||
rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "error adding p2p resource: %d\n", rc);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Alloc all p2p mem */
|
||||
p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size);
|
||||
if (!p2pr->p2pmem) {
|
||||
dev_err(hdev->dev, "error allocating p2p memory\n");
|
||||
rc = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *));
|
||||
if (!p2pr->p2ppages) {
|
||||
rc = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) {
|
||||
p2pr->p2ppages[i] = virt_to_page(addr);
|
||||
if (!p2pr->p2ppages[i]) {
|
||||
rc = -EFAULT;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
hl_p2p_region_fini(hdev, p2pr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_dio_start(struct hl_device *hdev)
|
||||
{
|
||||
dev_dbg(hdev->dev, "initializing HLDIO\n");
|
||||
|
||||
/* Initialize the IO counter and enable IO */
|
||||
hdev->hldio.inflight_ios = alloc_percpu(s64);
|
||||
if (!hdev->hldio.inflight_ios)
|
||||
return -ENOMEM;
|
||||
|
||||
hl_dio_set_io_enabled(hdev, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hl_dio_stop(struct hl_device *hdev)
|
||||
{
|
||||
dev_dbg(hdev->dev, "deinitializing HLDIO\n");
|
||||
|
||||
if (hdev->hldio.io_enabled) {
|
||||
/* Wait for all the IO to finish */
|
||||
hl_dio_set_io_enabled(hdev, false);
|
||||
hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT);
|
||||
}
|
||||
|
||||
if (hdev->hldio.inflight_ios) {
|
||||
free_percpu(hdev->hldio.inflight_ios);
|
||||
hdev->hldio.inflight_ios = NULL;
|
||||
}
|
||||
}
|
||||
146
drivers/accel/habanalabs/common/hldio.h
Normal file
146
drivers/accel/habanalabs/common/hldio.h
Normal file
@@ -0,0 +1,146 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver
|
||||
*
|
||||
* This feature requires specific hardware setup and must not be built
|
||||
* under COMPILE_TEST.
|
||||
*/
|
||||
|
||||
#ifndef __HL_HLDIO_H__
|
||||
#define __HL_HLDIO_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/ktime.h> /* ktime functions */
|
||||
#include <linux/delay.h> /* usleep_range */
|
||||
#include <linux/kernel.h> /* might_sleep_if */
|
||||
#include <linux/errno.h> /* error codes */
|
||||
|
||||
/* Forward declarations */
|
||||
struct hl_device;
|
||||
struct file;
|
||||
|
||||
/* Enable only if Kconfig selected */
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
/**
|
||||
* struct hl_p2p_region - describes a single P2P memory region
|
||||
* @p2ppages: array of page structs for the P2P memory
|
||||
* @p2pmem: virtual address of the P2P memory region
|
||||
* @device_pa: physical address on the device
|
||||
* @bar_offset: offset within the BAR
|
||||
* @size: size of the region in bytes
|
||||
* @bar: BAR number containing this region
|
||||
*/
|
||||
struct hl_p2p_region {
|
||||
struct page **p2ppages;
|
||||
void *p2pmem;
|
||||
u64 device_pa;
|
||||
u64 bar_offset;
|
||||
u64 size;
|
||||
int bar;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_dio_stats - Direct I/O statistics
|
||||
* @total_ops: total number of operations attempted
|
||||
* @successful_ops: number of successful operations
|
||||
* @failed_ops: number of failed operations
|
||||
* @bytes_transferred: total bytes successfully transferred
|
||||
* @last_len_read: length of the last read operation
|
||||
*/
|
||||
struct hl_dio_stats {
|
||||
u64 total_ops;
|
||||
u64 successful_ops;
|
||||
u64 failed_ops;
|
||||
u64 bytes_transferred;
|
||||
size_t last_len_read;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_dio - describes habanalabs direct storage interaction interface
|
||||
* @p2prs: array of p2p regions
|
||||
* @inflight_ios: percpu counter for inflight ios
|
||||
* @np2prs: number of elements in p2prs
|
||||
* @io_enabled: 1 if io is enabled 0 otherwise
|
||||
*/
|
||||
struct hl_dio {
|
||||
struct hl_p2p_region *p2prs;
|
||||
s64 __percpu *inflight_ios;
|
||||
u8 np2prs;
|
||||
u8 io_enabled;
|
||||
};
|
||||
|
||||
int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
|
||||
u64 device_va, off_t off_bytes, size_t len_bytes,
|
||||
size_t *len_read);
|
||||
void hl_p2p_region_fini_all(struct hl_device *hdev);
|
||||
int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr);
|
||||
int hl_dio_start(struct hl_device *hdev);
|
||||
void hl_dio_stop(struct hl_device *hdev);
|
||||
|
||||
/* Init/teardown */
|
||||
int hl_hldio_init(struct hl_device *hdev);
|
||||
void hl_hldio_fini(struct hl_device *hdev);
|
||||
|
||||
/* File operations */
|
||||
long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
|
||||
|
||||
/* DebugFS hooks */
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
void hl_hldio_debugfs_init(struct hl_device *hdev);
|
||||
void hl_hldio_debugfs_fini(struct hl_device *hdev);
|
||||
#else
|
||||
static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
|
||||
static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
|
||||
#endif
|
||||
|
||||
#else /* !CONFIG_HL_HLDIO */
|
||||
|
||||
struct hl_p2p_region;
|
||||
/* Stubs when HLDIO is disabled */
|
||||
static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
|
||||
u64 device_va, off_t off_bytes, size_t len_bytes,
|
||||
size_t *len_read)
|
||||
{ return -EOPNOTSUPP; }
|
||||
static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {}
|
||||
static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
|
||||
{ return -EOPNOTSUPP; }
|
||||
static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; }
|
||||
static inline void hl_dio_stop(struct hl_device *hdev) {}
|
||||
|
||||
static inline int hl_hldio_init(struct hl_device *hdev) { return 0; }
|
||||
static inline void hl_hldio_fini(struct hl_device *hdev) { }
|
||||
static inline long hl_hldio_ioctl(struct file *f, unsigned int c,
|
||||
unsigned long a)
|
||||
{ return -ENOTTY; }
|
||||
static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
|
||||
static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
|
||||
|
||||
#endif /* CONFIG_HL_HLDIO */
|
||||
|
||||
/* Simplified polling macro for HLDIO (no simulator support) */
|
||||
#define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \
|
||||
({ \
|
||||
ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \
|
||||
might_sleep_if(sleep_us); \
|
||||
(void)(hdev); /* keep signature consistent, hdev unused */ \
|
||||
for (;;) { \
|
||||
mb(); /* ensure ordering of memory operations */ \
|
||||
if (cond) \
|
||||
break; \
|
||||
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
|
||||
break; \
|
||||
if (sleep_us) \
|
||||
usleep_range((sleep_us >> 2) + 1, sleep_us); \
|
||||
} \
|
||||
(cond) ? 0 : -ETIMEDOUT; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_HL_HLDIO
|
||||
bool hl_device_supports_nvme(struct hl_device *hdev);
|
||||
#else
|
||||
static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; }
|
||||
#endif
|
||||
|
||||
#endif /* __HL_HLDIO_H__ */
|
||||
@@ -1837,7 +1837,12 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
|
||||
atomic_dec(&ctx->hdev->dmabuf_export_cnt);
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
/* Paired with get_file() in export_dmabuf() */
|
||||
/*
|
||||
* Paired with get_file() in export_dmabuf().
|
||||
* 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called,
|
||||
* because releasing the compute device file involves another reference decrement, and it
|
||||
* would be possible only after calling fput().
|
||||
*/
|
||||
fput(ctx->hpriv->file_priv->filp);
|
||||
|
||||
kfree(hl_dmabuf);
|
||||
@@ -2332,7 +2337,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
|
||||
if (rc < 0)
|
||||
goto destroy_pages;
|
||||
npages = rc;
|
||||
rc = -EFAULT;
|
||||
rc = -ENOMEM;
|
||||
goto put_pages;
|
||||
}
|
||||
userptr->npages = npages;
|
||||
|
||||
@@ -259,13 +259,8 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
|
||||
goto put_mem;
|
||||
}
|
||||
|
||||
#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
|
||||
if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start,
|
||||
user_mem_size)) {
|
||||
#else
|
||||
if (!access_ok((void __user *)(uintptr_t)vma->vm_start,
|
||||
user_mem_size)) {
|
||||
#endif
|
||||
dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n",
|
||||
buf->behavior->topic, vma->vm_start);
|
||||
|
||||
|
||||
@@ -96,14 +96,21 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
|
||||
infineon_second_stage_third_instance =
|
||||
(infineon_second_stage_version >> 16) & mask;
|
||||
|
||||
if (cpucp_info->infineon_second_stage_version)
|
||||
if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version)
|
||||
return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
|
||||
le32_to_cpu(cpucp_info->infineon_version),
|
||||
infineon_second_stage_first_instance,
|
||||
infineon_second_stage_second_instance,
|
||||
infineon_second_stage_third_instance);
|
||||
else
|
||||
else if (cpucp_info->infineon_second_stage_version)
|
||||
return sprintf(buf, "%#04x:%#04x:%#04x\n",
|
||||
infineon_second_stage_first_instance,
|
||||
infineon_second_stage_second_instance,
|
||||
infineon_second_stage_third_instance);
|
||||
else if (cpucp_info->infineon_version)
|
||||
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(vrm_ver);
|
||||
|
||||
@@ -4168,10 +4168,29 @@ static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
|
||||
VM_DONTCOPY | VM_NORESERVE);
|
||||
|
||||
#ifdef _HAS_DMA_MMAP_COHERENT
|
||||
/*
|
||||
* If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
|
||||
* so vm_insert_page() can handle it safely. Without this, the kernel
|
||||
* may BUG_ON due to VM_PFNMAP.
|
||||
*/
|
||||
if (is_vmalloc_addr(cpu_addr))
|
||||
vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
|
||||
(dma_addr - HOST_PHYS_BASE), size);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
|
||||
#else
|
||||
|
||||
rc = remap_pfn_range(vma, vma->vm_start,
|
||||
virt_to_phys(cpu_addr) >> PAGE_SHIFT,
|
||||
size, vma->vm_page_prot);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "remap_pfn_range error %d", rc);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -728,6 +728,354 @@ static const int gaudi2_dma_core_async_event_id[] = {
|
||||
[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
|
||||
};
|
||||
|
||||
const char *gaudi2_engine_id_str[] = {
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_0),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_1),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_MME),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_0),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_1),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_2),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_3),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_4),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_5),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_0),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_1),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_0),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_1),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_MME),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_0),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_1),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_2),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_3),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_4),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_5),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_0),
|
||||
__stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_1),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_0),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_1),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_MME),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_0),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_1),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_2),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_3),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_4),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_5),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_0),
|
||||
__stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_1),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_0),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_1),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_MME),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_0),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_1),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_2),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_3),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_4),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_5),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_0),
|
||||
__stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_1),
|
||||
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_6),
|
||||
__stringify(GAUDI2_ENGINE_ID_PDMA_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_PDMA_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_ROT_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_ROT_1),
|
||||
__stringify(GAUDI2_PCIE_ENGINE_ID_DEC_0),
|
||||
__stringify(GAUDI2_PCIE_ENGINE_ID_DEC_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC0_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC0_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC1_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC1_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC2_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC2_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC3_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC3_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC4_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC4_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC5_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC5_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC6_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC6_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC7_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC7_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC8_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC8_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC9_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC9_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC10_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC10_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC11_0),
|
||||
__stringify(GAUDI2_ENGINE_ID_NIC11_1),
|
||||
__stringify(GAUDI2_ENGINE_ID_PCIE),
|
||||
__stringify(GAUDI2_ENGINE_ID_PSOC),
|
||||
__stringify(GAUDI2_ENGINE_ID_ARC_FARM),
|
||||
__stringify(GAUDI2_ENGINE_ID_KDMA),
|
||||
__stringify(GAUDI2_ENGINE_ID_SIZE),
|
||||
};
|
||||
|
||||
const char *gaudi2_queue_id_str[] = {
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_PDMA_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_2_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_2_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_2_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_2_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_3_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_3_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_3_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_3_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_4_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_4_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_4_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_4_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_5_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_5_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_5_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_5_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_6_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_6_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_6_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_6_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_7_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_7_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_7_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_7_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_8_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_8_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_8_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_8_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_9_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_9_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_9_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_9_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_10_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_10_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_10_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_10_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_11_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_11_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_11_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_11_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_12_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_12_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_12_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_12_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_13_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_13_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_13_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_13_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_14_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_14_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_14_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_14_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_15_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_15_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_15_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_15_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_16_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_16_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_16_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_16_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_17_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_17_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_17_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_17_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_18_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_18_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_18_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_18_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_19_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_19_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_19_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_19_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_20_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_20_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_20_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_20_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_21_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_21_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_21_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_21_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_22_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_22_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_22_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_22_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_23_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_23_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_23_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_NIC_23_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_0_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_0_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_0_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_0_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_1_0),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_1_1),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_1_2),
|
||||
__stringify(GAUDI2_QUEUE_ID_ROT_1_3),
|
||||
__stringify(GAUDI2_QUEUE_ID_CPU_PQ),
|
||||
__stringify(GAUDI2_QUEUE_ID_SIZE),
|
||||
};
|
||||
|
||||
static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
|
||||
"qman sei intr",
|
||||
"arc sei intr"
|
||||
@@ -3150,7 +3498,6 @@ static int gaudi2_early_init(struct hl_device *hdev)
|
||||
rc = hl_fw_read_preboot_status(hdev);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
/* we are already on failure flow, so don't check if hw_fini fails. */
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
goto pci_fini;
|
||||
}
|
||||
@@ -3162,6 +3509,13 @@ static int gaudi2_early_init(struct hl_device *hdev)
|
||||
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
|
||||
goto pci_fini;
|
||||
}
|
||||
|
||||
rc = hl_fw_read_preboot_status(hdev);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
goto pci_fini;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -4836,7 +5190,7 @@ static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw
|
||||
else
|
||||
wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
|
||||
|
||||
if (fw_reset)
|
||||
if (fw_reset || hdev->cpld_shutdown)
|
||||
goto skip_engines;
|
||||
|
||||
gaudi2_stop_dma_qmans(hdev);
|
||||
@@ -6484,6 +6838,13 @@ static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
VM_DONTCOPY | VM_NORESERVE);
|
||||
|
||||
#ifdef _HAS_DMA_MMAP_COHERENT
|
||||
/*
|
||||
* If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
|
||||
* so vm_insert_page() can handle it safely. Without this, the kernel
|
||||
* may BUG_ON due to VM_PFNMAP.
|
||||
*/
|
||||
if (is_vmalloc_addr(cpu_addr))
|
||||
vm_flags_set(vma, VM_MIXEDMAP);
|
||||
|
||||
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
|
||||
if (rc)
|
||||
@@ -6774,7 +7135,8 @@ static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parse
|
||||
struct gaudi2_device *gaudi2 = hdev->asic_specific;
|
||||
|
||||
if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
|
||||
dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
|
||||
dev_err(hdev->dev, "h/w queue %s is disabled\n",
|
||||
GAUDI2_QUEUE_ID_TO_STR(parser->hw_queue_id));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -7026,7 +7388,8 @@ static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue
|
||||
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
|
||||
"Failed to send msg_short packet to H/W queue %s\n",
|
||||
GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@@ -7052,8 +7415,8 @@ static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queu
|
||||
timeout_usec);
|
||||
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
|
||||
hw_queue_id, tmp);
|
||||
dev_err(hdev->dev, "H/W queue %s test failed (SOB_OBJ_0 == 0x%x)\n",
|
||||
GAUDI2_QUEUE_ID_TO_STR(hw_queue_id), tmp);
|
||||
rc = -EIO;
|
||||
}
|
||||
|
||||
@@ -9603,8 +9966,8 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
|
||||
q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
|
||||
|
||||
gaudi2_print_event(hdev, event_type, true,
|
||||
"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
|
||||
engine_id, intr_type, q->queue_index);
|
||||
"ARC DCCM Full event: Eng: %s, Intr_type: %u, Qidx: %u",
|
||||
GAUDI2_ENG_ID_TO_STR(engine_id), intr_type, q->queue_index);
|
||||
return 1;
|
||||
default:
|
||||
gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
|
||||
@@ -10172,7 +10535,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
|
||||
le64_to_cpu(eq_entry->data[0]));
|
||||
error_count = GAUDI2_NA_EVENT_CAUSE;
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
hl_eq_cpld_shutdown_event_handle(hdev, event_type, &event_mask);
|
||||
break;
|
||||
|
||||
case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
|
||||
@@ -10260,6 +10623,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
|
||||
hl_handle_critical_hw_err(hdev, event_type, &event_mask);
|
||||
|
||||
hl_debugfs_cfg_access_history_dump(hdev);
|
||||
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
|
||||
hl_device_cond_reset(hdev, reset_flags, event_mask);
|
||||
}
|
||||
@@ -10296,8 +10660,8 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
|
||||
|
||||
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
|
||||
hw_queue_id);
|
||||
dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %s\n",
|
||||
GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -240,6 +240,15 @@
|
||||
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
|
||||
|
||||
|
||||
extern const char *gaudi2_engine_id_str[];
|
||||
extern const char *gaudi2_queue_id_str[];
|
||||
|
||||
#define GAUDI2_ENG_ID_TO_STR(initiator) ((initiator) >= GAUDI2_ENGINE_ID_SIZE ? "not found" : \
|
||||
gaudi2_engine_id_str[initiator])
|
||||
|
||||
#define GAUDI2_QUEUE_ID_TO_STR(initiator) ((initiator) >= GAUDI2_QUEUE_ID_SIZE ? "not found" : \
|
||||
gaudi2_queue_id_str[initiator])
|
||||
|
||||
enum gaudi2_reserved_sob_id {
|
||||
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
|
||||
GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =
|
||||
|
||||
@@ -2426,7 +2426,7 @@ static int gaudi2_config_bmon(struct hl_device *hdev, struct hl_debug_params *pa
|
||||
WREG32(base_reg + mmBMON_ADDRH_E3_OFFSET, 0);
|
||||
WREG32(base_reg + mmBMON_REDUCTION_OFFSET, 0);
|
||||
WREG32(base_reg + mmBMON_STM_TRC_OFFSET, 0x7 | (0xA << 8));
|
||||
WREG32(base_reg + mmBMON_CR_OFFSET, 0x77 | 0xf << 24);
|
||||
WREG32(base_reg + mmBMON_CR_OFFSET, 0x41);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1425,9 +1425,13 @@ struct cpucp_monitor_dump {
|
||||
* from "pkt_subidx" field in struct cpucp_packet.
|
||||
*
|
||||
* HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions.
|
||||
* HL_GET_ERR_COUNTERS_CMD - Command to get error counters
|
||||
* HL_GET_P_STATE - get performance state
|
||||
*/
|
||||
enum hl_passthrough_type {
|
||||
HL_PASSTHROUGH_VERSIONS,
|
||||
HL_GET_ERR_COUNTERS_CMD,
|
||||
HL_GET_P_STATE,
|
||||
};
|
||||
|
||||
#endif /* CPUCP_IF_H */
|
||||
|
||||
@@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(habanalabs_comms_template,
|
||||
__entry->op_str = op_str;
|
||||
),
|
||||
|
||||
TP_printk("%s: cms: %s",
|
||||
TP_printk("%s: cmd: %s",
|
||||
__get_str(dname),
|
||||
__entry->op_str)
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user