From c4f18703629dd0112641d6974eb295a53c4a4615 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:12 -0700 Subject: [PATCH] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this xe_gt_tlb_invalidation_range accepts a start and end address rather than a VMA. This will enable multiple VMAs to be invalidated in a single invalidation. Update the PT layer to use this new function. Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 157 ++++++++++++-------- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 3 + drivers/gpu/drm/xe/xe_pt.c | 25 +++- 3 files changed, 114 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 8e9c4b990fbb..d0ee1e0df0bd 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -262,6 +262,96 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +/** + * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an + * address range + * + * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion, can be NULL + * @start: start address + * @end: end address + * @asid: address space id + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can either use + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for + * completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ +int xe_gt_tlb_invalidation_range(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + u64 start, u64 end, u32 asid) +{ + struct xe_device *xe = gt_to_xe(gt); +#define MAX_TLB_INVALIDATION_LEN 7 + u32 action[MAX_TLB_INVALIDATION_LEN]; + int len = 0; + + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ + if (!xe->info.has_range_tlb_invalidation) { + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 orig_start = start; + u64 length = end - start; + u64 align, end; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(start, align); + end = ALIGN(end, align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(orig_start, length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(orig_start, length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(start, length)); + + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); + + return send_tlb_invalidation(>->uc.guc, fence, action, len); +} + /** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: graphics tile @@ -281,72 +371,11 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma) { - struct xe_device *xe = gt_to_xe(gt); -#define MAX_TLB_INVALIDATION_LEN 7 - u32 action[MAX_TLB_INVALIDATION_LEN]; - int len = 0; - xe_gt_assert(gt, vma); - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(fence); - - return 0; - } - - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation) { - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); - } else { - u64 start = xe_vma_start(vma); - u64 length = xe_vma_size(vma); - u64 align, end; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(xe_vma_start(vma), align); - end = ALIGN(xe_vma_end(vma), align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(xe_vma_start(vma), length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(xe_vma_start(vma), length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); - - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = xe_vma_vm(vma)->usm.asid; - action[len++] = lower_32_bits(start); - action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); - } - - xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); - - return send_tlb_invalidation(>->uc.guc, fence, action, len); + return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index fbb743d80d2c..bf3bebd9f985 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); +int xe_gt_tlb_invalidation_range(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + u64 start, u64 end, u32 asid); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 5b7930f46cf3..8d3765d3351e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = { struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; struct xe_gt *gt; - struct xe_vma *vma; struct dma_fence *fence; struct dma_fence_cb cb; struct work_struct work; + u64 start; + u64 end; + u32 asid; }; static const char * @@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w) container_of(w, struct invalidation_fence, work); trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); + xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, + ifence->end, ifence->asid); } static int invalidation_fence_init(struct xe_gt *gt, struct invalidation_fence *ifence, struct dma_fence *fence, - struct xe_vma *vma) + u64 start, u64 end, u32 asid) { int ret; @@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; - ifence->vma = vma; + ifence->start = start; + ifence->end = end; + ifence->asid = asid; INIT_WORK(&ifence->work, invalidation_fence_work_func); ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); @@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue /* TLB invalidation must be done before signaling rebind */ if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, ifence, fence, - vma); + int err = invalidation_fence_init(tile->primary_gt, + ifence, fence, + xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); if (err) { dma_fence_put(fence); kfree(ifence); @@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu dma_fence_wait(fence, false); /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); + err = invalidation_fence_init(tile->primary_gt, ifence, fence, + xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); if (err) { dma_fence_put(fence); kfree(ifence);