mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-04 00:15:49 -04:00
drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
xe_gt_tlb_invalidation_range accepts a start and end address rather than a VMA. This will enable multiple VMAs to be invalidated in a single invalidation. Update the PT layer to use this new function. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Oak Zeng <oak.zeng@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com
This commit is contained in:
@@ -262,6 +262,96 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
|
||||||
|
* address range
|
||||||
|
*
|
||||||
|
* @gt: graphics tile
|
||||||
|
* @fence: invalidation fence which will be signal on TLB invalidation
|
||||||
|
* completion, can be NULL
|
||||||
|
* @start: start address
|
||||||
|
* @end: end address
|
||||||
|
* @asid: address space id
|
||||||
|
*
|
||||||
|
* Issue a range based TLB invalidation if supported, if not fallback to a full
|
||||||
|
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
|
||||||
|
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
|
||||||
|
* completion.
|
||||||
|
*
|
||||||
|
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
|
||||||
|
* negative error code on error.
|
||||||
|
*/
|
||||||
|
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
|
||||||
|
struct xe_gt_tlb_invalidation_fence *fence,
|
||||||
|
u64 start, u64 end, u32 asid)
|
||||||
|
{
|
||||||
|
struct xe_device *xe = gt_to_xe(gt);
|
||||||
|
#define MAX_TLB_INVALIDATION_LEN 7
|
||||||
|
u32 action[MAX_TLB_INVALIDATION_LEN];
|
||||||
|
int len = 0;
|
||||||
|
|
||||||
|
/* Execlists not supported */
|
||||||
|
if (gt_to_xe(gt)->info.force_execlist) {
|
||||||
|
if (fence)
|
||||||
|
__invalidation_fence_signal(fence);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
|
||||||
|
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
|
||||||
|
if (!xe->info.has_range_tlb_invalidation) {
|
||||||
|
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
|
||||||
|
} else {
|
||||||
|
u64 orig_start = start;
|
||||||
|
u64 length = end - start;
|
||||||
|
u64 align, end;
|
||||||
|
|
||||||
|
if (length < SZ_4K)
|
||||||
|
length = SZ_4K;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to invalidate a higher granularity if start address
|
||||||
|
* is not aligned to length. When start is not aligned with
|
||||||
|
* length we need to find the length large enough to create an
|
||||||
|
* address mask covering the required range.
|
||||||
|
*/
|
||||||
|
align = roundup_pow_of_two(length);
|
||||||
|
start = ALIGN_DOWN(start, align);
|
||||||
|
end = ALIGN(end, align);
|
||||||
|
length = align;
|
||||||
|
while (start + length < end) {
|
||||||
|
length <<= 1;
|
||||||
|
start = ALIGN_DOWN(orig_start, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minimum invalidation size for a 2MB page that the hardware
|
||||||
|
* expects is 16MB
|
||||||
|
*/
|
||||||
|
if (length >= SZ_2M) {
|
||||||
|
length = max_t(u64, SZ_16M, length);
|
||||||
|
start = ALIGN_DOWN(orig_start, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
xe_gt_assert(gt, length >= SZ_4K);
|
||||||
|
xe_gt_assert(gt, is_power_of_2(length));
|
||||||
|
xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
|
||||||
|
ilog2(SZ_2M) + 1)));
|
||||||
|
xe_gt_assert(gt, IS_ALIGNED(start, length));
|
||||||
|
|
||||||
|
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
|
||||||
|
action[len++] = asid;
|
||||||
|
action[len++] = lower_32_bits(start);
|
||||||
|
action[len++] = upper_32_bits(start);
|
||||||
|
action[len++] = ilog2(length) - ilog2(SZ_4K);
|
||||||
|
}
|
||||||
|
|
||||||
|
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
|
||||||
|
|
||||||
|
return send_tlb_invalidation(>->uc.guc, fence, action, len);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
|
* xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
|
||||||
* @gt: graphics tile
|
* @gt: graphics tile
|
||||||
@@ -281,72 +371,11 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
|
|||||||
struct xe_gt_tlb_invalidation_fence *fence,
|
struct xe_gt_tlb_invalidation_fence *fence,
|
||||||
struct xe_vma *vma)
|
struct xe_vma *vma)
|
||||||
{
|
{
|
||||||
struct xe_device *xe = gt_to_xe(gt);
|
|
||||||
#define MAX_TLB_INVALIDATION_LEN 7
|
|
||||||
u32 action[MAX_TLB_INVALIDATION_LEN];
|
|
||||||
int len = 0;
|
|
||||||
|
|
||||||
xe_gt_assert(gt, vma);
|
xe_gt_assert(gt, vma);
|
||||||
|
|
||||||
/* Execlists not supported */
|
return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
|
||||||
if (gt_to_xe(gt)->info.force_execlist) {
|
xe_vma_end(vma),
|
||||||
if (fence)
|
xe_vma_vm(vma)->usm.asid);
|
||||||
__invalidation_fence_signal(fence);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
|
|
||||||
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
|
|
||||||
if (!xe->info.has_range_tlb_invalidation) {
|
|
||||||
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
|
|
||||||
} else {
|
|
||||||
u64 start = xe_vma_start(vma);
|
|
||||||
u64 length = xe_vma_size(vma);
|
|
||||||
u64 align, end;
|
|
||||||
|
|
||||||
if (length < SZ_4K)
|
|
||||||
length = SZ_4K;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to invalidate a higher granularity if start address
|
|
||||||
* is not aligned to length. When start is not aligned with
|
|
||||||
* length we need to find the length large enough to create an
|
|
||||||
* address mask covering the required range.
|
|
||||||
*/
|
|
||||||
align = roundup_pow_of_two(length);
|
|
||||||
start = ALIGN_DOWN(xe_vma_start(vma), align);
|
|
||||||
end = ALIGN(xe_vma_end(vma), align);
|
|
||||||
length = align;
|
|
||||||
while (start + length < end) {
|
|
||||||
length <<= 1;
|
|
||||||
start = ALIGN_DOWN(xe_vma_start(vma), length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Minimum invalidation size for a 2MB page that the hardware
|
|
||||||
* expects is 16MB
|
|
||||||
*/
|
|
||||||
if (length >= SZ_2M) {
|
|
||||||
length = max_t(u64, SZ_16M, length);
|
|
||||||
start = ALIGN_DOWN(xe_vma_start(vma), length);
|
|
||||||
}
|
|
||||||
|
|
||||||
xe_gt_assert(gt, length >= SZ_4K);
|
|
||||||
xe_gt_assert(gt, is_power_of_2(length));
|
|
||||||
xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
|
|
||||||
xe_gt_assert(gt, IS_ALIGNED(start, length));
|
|
||||||
|
|
||||||
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
|
|
||||||
action[len++] = xe_vma_vm(vma)->usm.asid;
|
|
||||||
action[len++] = lower_32_bits(start);
|
|
||||||
action[len++] = upper_32_bits(start);
|
|
||||||
action[len++] = ilog2(length) - ilog2(SZ_4K);
|
|
||||||
}
|
|
||||||
|
|
||||||
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
|
|
||||||
|
|
||||||
return send_tlb_invalidation(>->uc.guc, fence, action, len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
|
|||||||
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
|
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
|
||||||
struct xe_gt_tlb_invalidation_fence *fence,
|
struct xe_gt_tlb_invalidation_fence *fence,
|
||||||
struct xe_vma *vma);
|
struct xe_vma *vma);
|
||||||
|
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
|
||||||
|
struct xe_gt_tlb_invalidation_fence *fence,
|
||||||
|
u64 start, u64 end, u32 asid);
|
||||||
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
|
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
|
||||||
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
|
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
|
||||||
|
|
||||||
|
|||||||
@@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
|
|||||||
struct invalidation_fence {
|
struct invalidation_fence {
|
||||||
struct xe_gt_tlb_invalidation_fence base;
|
struct xe_gt_tlb_invalidation_fence base;
|
||||||
struct xe_gt *gt;
|
struct xe_gt *gt;
|
||||||
struct xe_vma *vma;
|
|
||||||
struct dma_fence *fence;
|
struct dma_fence *fence;
|
||||||
struct dma_fence_cb cb;
|
struct dma_fence_cb cb;
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
|
u64 start;
|
||||||
|
u64 end;
|
||||||
|
u32 asid;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
@@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
|
|||||||
container_of(w, struct invalidation_fence, work);
|
container_of(w, struct invalidation_fence, work);
|
||||||
|
|
||||||
trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
|
trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
|
||||||
xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
|
xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
|
||||||
|
ifence->end, ifence->asid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int invalidation_fence_init(struct xe_gt *gt,
|
static int invalidation_fence_init(struct xe_gt *gt,
|
||||||
struct invalidation_fence *ifence,
|
struct invalidation_fence *ifence,
|
||||||
struct dma_fence *fence,
|
struct dma_fence *fence,
|
||||||
struct xe_vma *vma)
|
u64 start, u64 end, u32 asid)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
|
|||||||
dma_fence_get(&ifence->base.base); /* Ref for caller */
|
dma_fence_get(&ifence->base.base); /* Ref for caller */
|
||||||
ifence->fence = fence;
|
ifence->fence = fence;
|
||||||
ifence->gt = gt;
|
ifence->gt = gt;
|
||||||
ifence->vma = vma;
|
ifence->start = start;
|
||||||
|
ifence->end = end;
|
||||||
|
ifence->asid = asid;
|
||||||
|
|
||||||
INIT_WORK(&ifence->work, invalidation_fence_work_func);
|
INIT_WORK(&ifence->work, invalidation_fence_work_func);
|
||||||
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
|
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
|
||||||
@@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
|
|||||||
|
|
||||||
/* TLB invalidation must be done before signaling rebind */
|
/* TLB invalidation must be done before signaling rebind */
|
||||||
if (ifence) {
|
if (ifence) {
|
||||||
int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
|
int err = invalidation_fence_init(tile->primary_gt,
|
||||||
vma);
|
ifence, fence,
|
||||||
|
xe_vma_start(vma),
|
||||||
|
xe_vma_end(vma),
|
||||||
|
xe_vma_vm(vma)->usm.asid);
|
||||||
if (err) {
|
if (err) {
|
||||||
dma_fence_put(fence);
|
dma_fence_put(fence);
|
||||||
kfree(ifence);
|
kfree(ifence);
|
||||||
@@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
|
|||||||
dma_fence_wait(fence, false);
|
dma_fence_wait(fence, false);
|
||||||
|
|
||||||
/* TLB invalidation must be done before signaling unbind */
|
/* TLB invalidation must be done before signaling unbind */
|
||||||
err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
|
err = invalidation_fence_init(tile->primary_gt, ifence, fence,
|
||||||
|
xe_vma_start(vma),
|
||||||
|
xe_vma_end(vma),
|
||||||
|
xe_vma_vm(vma)->usm.asid);
|
||||||
if (err) {
|
if (err) {
|
||||||
dma_fence_put(fence);
|
dma_fence_put(fence);
|
||||||
kfree(ifence);
|
kfree(ifence);
|
||||||
|
|||||||
Reference in New Issue
Block a user