diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index f7a786dba9bc..06aff84ad8f7 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -61,6 +61,18 @@ /* MMU */ #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ +/** + * enum hl_mmu_page_table_locaion - mmu page table location + * @MMU_DR_PGT: page-table is located on device DRAM. + * @MMU_HR_PGT: page-table is located on host memory. + * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported. + */ +enum hl_mmu_page_table_location { + MMU_DR_PGT = 0, /* device-dram-resident MMU PGT */ + MMU_HR_PGT, /* host resident MMU PGT */ + MMU_NUM_PGT_LOCATIONS /* num of PGT locations */ +}; + /* * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream @@ -303,6 +315,8 @@ enum hl_device_hw_state { * @hop5_mask: mask to get the PTE address in hop 5. * @page_size: default page size used to allocate memory. * @num_hops: The amount of hops supported by the translation table. + * @host_resident: Should the MMU page table reside in host memory or in the + * device DRAM. */ struct hl_mmu_properties { u64 start_addr; @@ -321,6 +335,7 @@ struct hl_mmu_properties { u64 hop5_mask; u32 page_size; u32 num_hops; + u8 host_resident; }; /** @@ -1572,17 +1587,51 @@ struct hl_device_idle_busy_ts { ktime_t busy_to_idle_ts; }; +/** + * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop + * information. + * @virt_addr: the virtual address of the hop. + * @phys-addr: the physical address of the hop (used by the device-mmu). + * @shadow_addr: The shadow of the hop used by the driver for walking the hops. + */ +struct hr_mmu_hop_addrs { + u64 virt_addr; + u64 phys_addr; + u64 shadow_addr; +}; /** - * struct hl_mmu_priv - used for holding per-device mmu internal information. + * struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident + * page-table internal information. * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops. * @mmu_shadow_hop0: shadow array of hop0 tables. */ -struct hl_mmu_priv { +struct hl_mmu_hr_priv { + struct gen_pool *mmu_pgt_pool; + struct hr_mmu_hop_addrs *mmu_shadow_hop0; +}; + +/** + * struct hl_mmu_dr_pgt_priv - used for holding per-device mmu device-resident + * page-table internal information. + * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops. + * @mmu_shadow_hop0: shadow array of hop0 tables. + */ +struct hl_mmu_dr_priv { struct gen_pool *mmu_pgt_pool; void *mmu_shadow_hop0; }; +/** + * struct hl_mmu_priv - used for holding per-device mmu internal information. + * @dr: information on the device-resident MMU, when exists. + * @hr: information on the host-resident MMU, when exists. + */ +struct hl_mmu_priv { + struct hl_mmu_dr_priv dr; + struct hl_mmu_hr_priv hr; +}; + /** * struct hl_mmu_funcs - Device related MMU functions. * @init: initialize the MMU module. @@ -1779,7 +1828,7 @@ struct hl_device { struct hl_cs_counters_atomic aggregated_cs_counters; struct hl_mmu_priv mmu_priv; - struct hl_mmu_funcs mmu_func; + struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; atomic64_t dram_used_mem; u64 timeout_jiffies; @@ -2042,7 +2091,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); -void hl_mmu_v1_set_funcs(struct hl_device *hdev); +void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index 451148959431..6f535c81478d 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -22,18 +22,25 @@ static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) * hl_mmu_init() - initialize the MMU module. * @hdev: habanalabs device structure. * - * This function does the following: - * - Create a pool of pages for pgt_infos. - * - Create a shadow table for pgt - * * Return: 0 for success, non-zero for failure. */ int hl_mmu_init(struct hl_device *hdev) { - if (hdev->mmu_enable) - return hdev->mmu_func.init(hdev); + int rc = -EOPNOTSUPP; - return 0; + if (!hdev->mmu_enable) + return 0; + + if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { + rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); + if (rc) + return rc; + } + + if (hdev->mmu_func[MMU_HR_PGT].init != NULL) + rc = hdev->mmu_func[MMU_HR_PGT].init(hdev); + + return rc; } /** @@ -48,8 +55,14 @@ int hl_mmu_init(struct hl_device *hdev) */ void hl_mmu_fini(struct hl_device *hdev) { - if (hdev->mmu_enable) - hdev->mmu_func.fini(hdev); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].fini != NULL) + hdev->mmu_func[MMU_DR_PGT].fini(hdev); + + if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) + hdev->mmu_func[MMU_HR_PGT].fini(hdev); } /** @@ -63,11 +76,21 @@ void hl_mmu_fini(struct hl_device *hdev) int hl_mmu_ctx_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; + int rc = -EOPNOTSUPP; - if (hdev->mmu_enable) - return hdev->mmu_func.ctx_init(ctx); + if (!hdev->mmu_enable) + return 0; - return 0; + if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { + rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); + if (rc) + return rc; + } + + if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL) + rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx); + + return rc; } /* @@ -84,8 +107,14 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (hdev->mmu_enable) - hdev->mmu_func.ctx_fini(ctx); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL) + hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx); + + if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) + hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); } /* @@ -117,7 +146,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, struct hl_mmu_properties *mmu_prop; u64 real_virt_addr; u32 real_page_size, npages; - int i, rc = 0; + int i, rc = 0, pgt_residency; bool is_dram_addr; if (!hdev->mmu_enable) @@ -132,6 +161,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, else mmu_prop = &prop->pmmu; + pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and unmap them separately. @@ -150,7 +181,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr); + rc = hdev->mmu_func[pgt_residency].unmap(ctx, + real_virt_addr, is_dram_addr); if (rc) break; @@ -158,7 +190,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, } if (flush_pte) - hdev->mmu_func.flush(ctx); + hdev->mmu_func[pgt_residency].flush(ctx); return rc; } @@ -193,9 +225,10 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; u32 real_page_size, npages; - int i, rc, mapped_cnt = 0; + int i, rc, pgt_residency, mapped_cnt = 0; bool is_dram_addr; + if (!hdev->mmu_enable) return 0; @@ -208,6 +241,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, else mmu_prop = &prop->pmmu; + pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and map them separately. @@ -231,8 +266,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, real_phys_addr = phys_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr, - real_page_size, is_dram_addr); + rc = hdev->mmu_func[pgt_residency].map(ctx, + real_virt_addr, real_phys_addr, + real_page_size, is_dram_addr); if (rc) goto err; @@ -242,21 +278,22 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, } if (flush_pte) - hdev->mmu_func.flush(ctx); + hdev->mmu_func[pgt_residency].flush(ctx); return 0; err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr)) + if (hdev->mmu_func[pgt_residency].unmap(ctx, + real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); real_virt_addr += real_page_size; } - hdev->mmu_func.flush(ctx); + hdev->mmu_func[pgt_residency].flush(ctx); return rc; } @@ -271,8 +308,14 @@ void hl_mmu_swap_out(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (hdev->mmu_enable) - hdev->mmu_func.swap_out(ctx); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL) + hdev->mmu_func[MMU_DR_PGT].swap_out(ctx); + + if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL) + hdev->mmu_func[MMU_HR_PGT].swap_out(ctx); } /* @@ -285,8 +328,14 @@ void hl_mmu_swap_in(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (hdev->mmu_enable) - hdev->mmu_func.swap_in(ctx); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL) + hdev->mmu_func[MMU_DR_PGT].swap_in(ctx); + + if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL) + hdev->mmu_func[MMU_HR_PGT].swap_in(ctx); } int hl_mmu_if_set_funcs(struct hl_device *hdev) @@ -297,7 +346,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: case ASIC_GAUDI: - hl_mmu_v1_set_funcs(hdev); + hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 8d1eb5265419..ec7e8a3c37b8 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -29,7 +29,7 @@ static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) { struct hl_device *hdev = ctx->hdev; - gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr, + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, hdev->asic_prop.mmu_hop_table_size); hash_del(&pgt_info->node); kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); @@ -54,7 +54,7 @@ static u64 alloc_hop(struct hl_ctx *ctx) if (!pgt_info) return ULLONG_MAX; - phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool, + phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_hop_table_size); if (!phys_addr) { dev_err(hdev->dev, "failed to allocate page\n"); @@ -75,7 +75,7 @@ static u64 alloc_hop(struct hl_ctx *ctx) return shadow_addr; shadow_err: - gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr, + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size); pool_add_err: kfree(pgt_info); @@ -91,7 +91,7 @@ static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) static inline u64 get_hop0_addr(struct hl_ctx *ctx) { - return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 + + return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); } @@ -419,15 +419,15 @@ static int hl_mmu_v1_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; - hdev->mmu_priv.mmu_pgt_pool = + hdev->mmu_priv.dr.mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - if (!hdev->mmu_priv.mmu_pgt_pool) { + if (!hdev->mmu_priv.dr.mmu_pgt_pool) { dev_err(hdev->dev, "Failed to create page gen pool\n"); return -ENOMEM; } - rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr + + rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + prop->mmu_hop0_tables_total_size, prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, -1); @@ -436,10 +436,10 @@ static int hl_mmu_v1_init(struct hl_device *hdev) goto err_pool_add; } - hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, + hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, prop->mmu_hop_table_size, GFP_KERNEL | __GFP_ZERO); - if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) { + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { rc = -ENOMEM; goto err_pool_add; } @@ -449,7 +449,7 @@ static int hl_mmu_v1_init(struct hl_device *hdev) return 0; err_pool_add: - gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); return rc; } @@ -468,8 +468,8 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - kvfree(hdev->mmu_priv.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); } /** @@ -847,10 +847,8 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) * * @hdev: pointer to the device structure */ -void hl_mmu_v1_set_funcs(struct hl_device *hdev) +void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) { - struct hl_mmu_funcs *mmu = &hdev->mmu_func; - mmu->init = hl_mmu_v1_init; mmu->fini = hl_mmu_v1_fini; mmu->ctx_init = hl_mmu_v1_ctx_init;