diff --git a/.mailmap b/.mailmap index 84309a39d329..44cea28596e7 100644 --- a/.mailmap +++ b/.mailmap @@ -481,6 +481,7 @@ Lorenzo Pieralisi Lorenzo Stoakes Luca Ceresoli Luca Weiss +Lucas De Marchi Lukasz Luba Luo Jie Lance Yang diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov index 2fd7e9b7bacc..7f5ef9eada53 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov @@ -119,7 +119,7 @@ Description: The GT preemption timeout (PT) in [us] to be applied to all functions. See sriov_admin/{pf,vf}/profile/preempt_timeout_us for more details. - sched_priority: (RW/RO) string + sched_priority: (WO) string The GT scheduling priority to be applied for all functions. See sriov_admin/{pf,vf}/profile/sched_priority for more details. diff --git a/Documentation/gpu/xe/xe_exec_queue.rst b/Documentation/gpu/xe/xe_exec_queue.rst index 6076569e311c..8707806211c9 100644 --- a/Documentation/gpu/xe/xe_exec_queue.rst +++ b/Documentation/gpu/xe/xe_exec_queue.rst @@ -7,6 +7,20 @@ Execution Queue .. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.c :doc: Execution Queue +Multi Queue Group +================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.c + :doc: Multi Queue Group + +.. _multi-queue-group-guc-interface: + +Multi Queue Group GuC interface +=============================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_submit.c + :doc: Multi Queue Group GuC interface + Internal API ============ diff --git a/MAINTAINERS b/MAINTAINERS index bdbe32ddcedb..b8a5569606d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12640,7 +12640,7 @@ F: include/drm/intel/ F: include/uapi/drm/i915_drm.h INTEL DRM XE DRIVER (Lunar Lake and newer) -M: Lucas De Marchi +M: Matthew Brost M: Thomas Hellström M: Rodrigo Vivi L: intel-xe@lists.freedesktop.org diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 73e550c8ff8c..39c8c50401dd 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1288,6 +1288,9 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, DMA_BIDIRECTIONAL; retry: + if (time_after(jiffies, timeout)) + return -EBUSY; + hmm_range.notifier_seq = mmu_interval_read_begin(notifier); if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) goto set_seqno; diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 35f71dbd1bac..7f08b4cd91d6 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -95,6 +95,7 @@ xe-y += xe_bb.o \ xe_oa.o \ xe_observation.o \ xe_pagefault.o \ + xe_page_reclaim.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ @@ -173,6 +174,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ + xe_mert.o \ xe_pci_sriov.o \ xe_sriov_packet.o \ xe_sriov_pf.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 47756e4674a1..83a6e7794982 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -139,6 +139,10 @@ enum xe_guc_action { XE_GUC_ACTION_DEREGISTER_G2G = 0x4508, XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, + XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE = 0x4602, + XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC = 0x4603, + XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE = 0x4604, + XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR = 0x4605, XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, @@ -151,6 +155,8 @@ enum xe_guc_action { XE_GUC_ACTION_TLB_INVALIDATION = 0x7000, XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001, XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002, + XE_GUC_ACTION_PAGE_RECLAMATION = 0x7003, + XE_GUC_ACTION_PAGE_RECLAMATION_DONE = 0x7004, XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 0b28659d94e9..d9f21202e1a9 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -502,13 +502,17 @@ #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 /** - * DOC: VF2GUC_NOTIFY_RESFIX_DONE + * DOC: VF2GUC_RESFIX_DONE * - * This action is used by VF to notify the GuC that the VF KMD has completed - * post-migration recovery steps. + * This action is used by VF to inform the GuC that the VF KMD has completed + * post-migration recovery steps. From GuC VF compatibility 1.27.0 onwards, it + * shall only be sent after posting RESFIX_START and that both @MARKER fields + * must match. * * This message must be sent as `MMIO HXG Message`_. * + * Updated since GuC VF compatibility 1.27.0. + * * +---+-------+--------------------------------------------------------------+ * | | Bits | Description | * +===+=======+==============================================================+ @@ -516,9 +520,11 @@ * | +-------+--------------------------------------------------------------+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | * | +-------+--------------------------------------------------------------+ - * | | 27:16 | DATA0 = MBZ | + * | | 27:16 | DATA0 = MARKER = MBZ (only prior 1.27.0) | * | +-------+--------------------------------------------------------------+ - * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 | + * | | 27:16 | DATA0 = MARKER - can't be zero (1.27.0+) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_RESFIX_DONE` = 0x5508 | * +---+-------+--------------------------------------------------------------+ * * +---+-------+--------------------------------------------------------------+ @@ -531,13 +537,13 @@ * | | 27:0 | DATA0 = MBZ | * +---+-------+--------------------------------------------------------------+ */ -#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u +#define GUC_ACTION_VF2GUC_RESFIX_DONE 0x5508u -#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN -#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2GUC_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN +#define VF2GUC_RESFIX_DONE_REQUEST_MSG_0_MARKER GUC_HXG_REQUEST_MSG_0_DATA0 -#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN -#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +#define VF2GUC_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 /** * DOC: VF2GUC_QUERY_SINGLE_KLV @@ -656,4 +662,45 @@ #define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN #define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_0_USED GUC_HXG_RESPONSE_MSG_0_DATA0 +/** + * DOC: VF2GUC_RESFIX_START + * + * This action is used by VF to inform the GuC that the VF KMD will be starting + * post-migration recovery fixups. The @MARKER sent with this action must match + * with the MARKER posted in the VF2GUC_RESFIX_DONE message. + * + * This message must be sent as `MMIO HXG Message`_. + * + * Available since GuC VF compatibility 1.27.0. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MARKER - can't be zero | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_RESFIX_START` = 0x550F | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_RESFIX_START 0x550Fu + +#define VF2GUC_RESFIX_START_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN +#define VF2GUC_RESFIX_START_REQUEST_MSG_0_MARKER GUC_HXG_REQUEST_MSG_0_DATA0 + +#define VF2GUC_RESFIX_START_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_RESFIX_START_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 265a135e7061..89a4f8c504e6 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -352,6 +352,12 @@ enum { * :1: NORMAL = schedule VF always, irrespective of whether it has work or not * :2: HIGH = schedule VF in the next time-slice after current active * time-slice completes if it has active work + * + * _`GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT` : 0x8A0D + * Given that multi-LRC contexts are incompatible with SRIOV scheduler + * groups and cause the latter to be turned off when registered with the + * GuC, this config allows the PF to set a threshold for multi-LRC context + * registrations by VFs to monitor their behavior. */ #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 @@ -410,6 +416,9 @@ enum { #define GUC_SCHED_PRIORITY_NORMAL 1u #define GUC_SCHED_PRIORITY_HIGH 2u +#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_KEY 0x8a0d +#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_LEN 1u + /* * Workaround keys: */ diff --git a/drivers/gpu/drm/xe/abi/guc_lfd_abi.h b/drivers/gpu/drm/xe/abi/guc_lfd_abi.h new file mode 100644 index 000000000000..b6ed20d5b508 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_lfd_abi.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _ABI_GUC_LFD_ABI_H_ +#define _ABI_GUC_LFD_ABI_H_ + +#include + +#include "guc_lic_abi.h" + +/* The current major version of GuC-Log-File format. */ +#define GUC_LFD_FORMAT_VERSION_MAJOR 0x0001 +/* The current minor version of GuC-Log-File format. */ +#define GUC_LFD_FORMAT_VERSION_MINOR 0x0000 + +/** enum guc_lfd_type - Log format descriptor type */ +enum guc_lfd_type { + /** + * @GUC_LFD_TYPE_FW_REQUIRED_RANGE_START: Start of range for + * required LFDs from GuC + * @GUC_LFD_TYPE_FW_VERSION: GuC Firmware Version structure. + * @GUC_LFD_TYPE_GUC_DEVICE_ID: GuC microcontroller device ID. + * @GUC_LFD_TYPE_TSC_FREQUENCY: Frequency of GuC timestamps. + * @GUC_LFD_TYPE_GMD_ID: HW GMD ID. + * @GUC_LFD_TYPE_BUILD_PLATFORM_ID: GuC build platform ID. + * @GUC_LFD_TYPE_FW_REQUIRED_RANGE_END: End of range for + * required LFDs from GuC + */ + GUC_LFD_TYPE_FW_REQUIRED_RANGE_START = 0x1, + GUC_LFD_TYPE_FW_VERSION = 0x1, + GUC_LFD_TYPE_GUC_DEVICE_ID = 0x2, + GUC_LFD_TYPE_TSC_FREQUENCY = 0x3, + GUC_LFD_TYPE_GMD_ID = 0x4, + GUC_LFD_TYPE_BUILD_PLATFORM_ID = 0x5, + GUC_LFD_TYPE_FW_REQUIRED_RANGE_END = 0x1FFF, + + /** + * @GUC_LFD_TYPE_FW_OPTIONAL_RANGE_START: Start of range for + * optional LFDs from GuC + * @GUC_LFD_TYPE_LOG_EVENTS_BUFFER: Log-event-entries buffer. + * @GUC_LFD_TYPE_FW_CRASH_DUMP: GuC generated crash-dump blob. + * @GUC_LFD_TYPE_FW_OPTIONAL_RANGE_END: End of range for + * optional LFDs from GuC + */ + GUC_LFD_TYPE_FW_OPTIONAL_RANGE_START = 0x2000, + GUC_LFD_TYPE_LOG_EVENTS_BUFFER = 0x2000, + GUC_LFD_TYPE_FW_CRASH_DUMP = 0x2001, + GUC_LFD_TYPE_FW_OPTIONAL_RANGE_END = 0x3FFF, + + /** + * @GUC_LFD_TYPE_KMD_REQUIRED_RANGE_START: Start of range for + * required KMD LFDs + * @GUC_LFD_TYPE_OS_ID: An identifier for the OS. + * @GUC_LFD_TYPE_KMD_REQUIRED_RANGE_END: End of this range for + * required KMD LFDs + */ + GUC_LFD_TYPE_KMD_REQUIRED_RANGE_START = 0x4000, + GUC_LFD_TYPE_OS_ID = 0x4000, + GUC_LFD_TYPE_KMD_REQUIRED_RANGE_END = 0x5FFF, + + /** + * @GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_START: Start of range for + * optional KMD LFDs + * @GUC_LFD_TYPE_BINARY_SCHEMA_FORMAT: Binary representation of + * GuC log-events schema. + * @GUC_LFD_TYPE_HOST_COMMENT: ASCII string containing comments + * from the host/KMD. + * @GUC_LFD_TYPE_TIMESTAMP_ANCHOR: A timestamp anchor, to convert + * between host and GuC timestamp. + * @GUC_LFD_TYPE_TIMESTAMP_ANCHOR_CONFIG: Timestamp anchor + * configuration, definition of timestamp frequency and bit width. + * @GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_END: End of this range for + * optional KMD LFDs + */ + GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_START = 0x6000, + GUC_LFD_TYPE_BINARY_SCHEMA_FORMAT = 0x6000, + GUC_LFD_TYPE_HOST_COMMENT = 0x6001, + GUC_LFD_TYPE_TIMESTAMP_ANCHOR = 0x6002, + GUC_LFD_TYPE_TIMESTAMP_ANCHOR_CONFIG = 0x6003, + GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_END = 0x7FFF, + + /* + * @GUC_LFD_TYPE_RESERVED_RANGE_START: Start of reserved range + * @GUC_LFD_TYPE_RESERVED_RANGE_END: End of reserved range + */ + GUC_LFD_TYPE_RESERVED_RANGE_START = 0x8000, + GUC_LFD_TYPE_RESERVED_RANGE_END = 0xFFFF, +}; + +/** enum guc_lfd_os_type - OS Type LFD-ID */ +enum guc_lfd_os_type { + /** @GUC_LFD_OS_TYPE_OSID_WIN: Windows OS */ + GUC_LFD_OS_TYPE_OSID_WIN = 0x1, + /** @GUC_LFD_OS_TYPE_OSID_LIN: Linux OS */ + GUC_LFD_OS_TYPE_OSID_LIN = 0x2, + /** @GUC_LFD_OS_TYPE_OSID_VMW: VMWare OS */ + GUC_LFD_OS_TYPE_OSID_VMW = 0x3, + /** @GUC_LFD_OS_TYPE_OSID_OTHER: Other */ + GUC_LFD_OS_TYPE_OSID_OTHER = 0x4, +}; + +/** struct guc_lfd_data - A generic header structure for all LFD blocks */ +struct guc_lfd_data { + /** @header: A 32 bits dword, contains multiple bit fields */ + u32 header; + /* LFD type. See guc_lfd_type */ +#define GUC_LFD_DATA_HEADER_MASK_TYPE GENMASK(31, 16) +#define GUC_LFD_DATA_HEADER_MASK_MAGIC GENMASK(15, 0) + + /** @data_count: Number of dwords the `data` field contains. */ + u32 data_count; + /** @data: Data defined by GUC_LFD_DATA_HEADER_MASK_TYPE */ + u32 data[] __counted_by(data_count); +} __packed; + +/** + * struct guc_lfd_data_log_events_buf - GuC Log Events Buffer. + * This is optional fw LFD data + */ +struct guc_lfd_data_log_events_buf { + /** + * @log_events_format_version: version of GuC log format of buffer + */ + u32 log_events_format_version; + /** + * @log_event: The log event data. + * Size in dwords is LFD block size - 1. + */ + u32 log_event[]; +} __packed; + +/** struct guc_lfd_data_os_info - OS Version Information. */ +struct guc_lfd_data_os_info { + /** + * @os_id: enum values to identify the OS brand. + * See guc_lfd_os_type for the range of types + */ + u32 os_id; + /** + * @build_version: ASCII string containing OS build version + * information based on os_id. String is padded with null + * characters to ensure its DWORD aligned. + * Size in dwords is LFD block size - 1. + */ + char build_version[]; +} __packed; + +/** + * struct guc_logfile_header - Header of GuC Log Streaming-LFD-File Format. + * This structure encapsulates the layout of the guc-log-file format + */ +struct guc_lfd_file_header { + /** + * @magic: A magic number set by producer of a GuC log file to + * identify that file is a valid guc-log-file containing a stream + * of LFDs. + */ + u64 magic; + /** @version: Version of this file format layout */ + u32 version; +#define GUC_LFD_FILE_HEADER_VERSION_MASK_MAJOR GENMASK(31, 16) +#define GUC_LFD_FILE_HEADER_VERSION_MASK_MINOR GENMASK(15, 0) + + /** @stream: A stream of one or more guc_lfd_data LFD blocks + */ + u32 stream[]; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_lic_abi.h b/drivers/gpu/drm/xe/abi/guc_lic_abi.h new file mode 100644 index 000000000000..9169644093a2 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_lic_abi.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _ABI_GUC_LIC_ABI_H_ +#define _ABI_GUC_LIC_ABI_H_ + +#include + +/** + * enum guc_lic_type - Log Init Config KLV IDs. + */ +enum guc_lic_type { + /** + * @GUC_LIC_TYPE_GUC_SW_VERSION: GuC firmware version. Value + * is a 32 bit number represented by guc_sw_version. + */ + GUC_LIC_TYPE_GUC_SW_VERSION = 0x1, + /** + * @GUC_LIC_TYPE_GUC_DEVICE_ID: GuC device id. Value is a 32 + * bit. + */ + GUC_LIC_TYPE_GUC_DEVICE_ID = 0x2, + /** + * @GUC_LIC_TYPE_TSC_FREQUENCY: GuC timestamp counter + * frequency. Value is a 32 bit number representing frequency in + * kHz. This timestamp is utilized in log entries, timer and + * for engine utilization tracking. + */ + GUC_LIC_TYPE_TSC_FREQUENCY = 0x3, + /** + * @GUC_LIC_TYPE_GMD_ID: HW GMD ID. Value is a 32 bit number + * representing graphics, media and display HW architecture IDs. + */ + GUC_LIC_TYPE_GMD_ID = 0x4, + /** + * @GUC_LIC_TYPE_BUILD_PLATFORM_ID: GuC build platform ID. + * Value is 32 bits. + */ + GUC_LIC_TYPE_BUILD_PLATFORM_ID = 0x5, +}; + +/** + * struct guc_lic - GuC LIC (Log-Init-Config) structure. + * + * This is populated by the GUC at log init time and is located in the log + * buffer memory allocation. + */ +struct guc_lic { + /** + * @magic: A magic number set by GuC to identify that this + * structure contains valid information: magic = GUC_LIC_MAGIC. + */ + u32 magic; +#define GUC_LIC_MAGIC 0x8086900D + /** + * @version: The version of the this structure. + * Major and minor version number are represented as bit fields. + */ + u32 version; +#define GUC_LIC_VERSION_MASK_MAJOR GENMASK(31, 16) +#define GUC_LIC_VERSION_MASK_MINOR GENMASK(15, 0) + +#define GUC_LIC_VERSION_MAJOR 1u +#define GUC_LIC_VERSION_MINOR 0u + + /** @data_count: Number of dwords the `data` array contains. */ + u32 data_count; + /** + * @data: Array of dwords representing a list of LIC KLVs of + * type guc_klv_generic with keys represented by guc_lic_type + */ + u32 data[] __counted_by(data_count); +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_log_abi.h b/drivers/gpu/drm/xe/abi/guc_log_abi.h index 554630b7ccd9..fbf212d59a40 100644 --- a/drivers/gpu/drm/xe/abi/guc_log_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_log_abi.h @@ -8,11 +8,45 @@ #include +/** + * DOC: GuC Log buffer Layout + * + * The in-memory log buffer layout is as follows:: + * + * +===============================+ 0000h + * | Crash dump state header | ^ + * +-------------------------------+ 32B | + * | Debug state header | | + * +-------------------------------+ 64B 4KB + * | Capture state header | | + * +-------------------------------+ 96B | + * | | v + * +===============================+ <--- EVENT_DATA_OFFSET + * | Event logs(raw data) | ^ + * | | | + * | | EVENT_DATA_BUFFER_SIZE + * | | | + * | | v + * +===============================+ <--- CRASH_DUMP_OFFSET + * | Crash Dump(raw data) | ^ + * | | | + * | | CRASH_DUMP_BUFFER_SIZE + * | | | + * | | v + * +===============================+ <--- STATE_CAPTURE_OFFSET + * | Error state capture(raw data) | ^ + * | | | + * | | STATE_CAPTURE_BUFFER_SIZE + * | | | + * | | v + * +===============================+ Total: GUC_LOG_SIZE + */ + /* GuC logging buffer types */ -enum guc_log_buffer_type { - GUC_LOG_BUFFER_CRASH_DUMP, - GUC_LOG_BUFFER_DEBUG, - GUC_LOG_BUFFER_CAPTURE, +enum guc_log_type { + GUC_LOG_TYPE_EVENT_DATA, + GUC_LOG_TYPE_CRASH_DUMP, + GUC_LOG_TYPE_STATE_CAPTURE, }; #define GUC_LOG_BUFFER_TYPE_MAX 3 diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 1fd4a815e784..6a935a75f2a4 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -210,10 +210,11 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_pm_runtime_get_noresume(xe); - ret = mutex_lock_interruptible(&ggtt->lock); + guard(xe_pm_runtime_noresume)(xe); + ACQUIRE(mutex_intr, lock)(&ggtt->lock); + ret = ACQUIRE_ERR(mutex_intr, &lock); if (ret) - goto out; + return ret; align = XE_PAGE_SIZE; if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) @@ -223,15 +224,13 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, vma->node = bo->ggtt_node[tile0->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { vma->node = xe_ggtt_node_init(ggtt); - if (IS_ERR(vma->node)) { - ret = PTR_ERR(vma->node); - goto out_unlock; - } + if (IS_ERR(vma->node)) + return PTR_ERR(vma->node); ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); - goto out_unlock; + return ret; } xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]); @@ -245,13 +244,13 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, vma->node = xe_ggtt_node_init(ggtt); if (IS_ERR(vma->node)) { ret = PTR_ERR(vma->node); - goto out_unlock; + return ret; } ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); if (ret) { xe_ggtt_node_fini(vma->node); - goto out_unlock; + return ret; } ggtt_ofs = vma->node->base.start; @@ -265,10 +264,6 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, rot_info->plane[i].dst_stride); } -out_unlock: - mutex_unlock(&ggtt->lock); -out: - xe_pm_runtime_put(xe); return ret; } diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 4e5ccd50f69d..07acae121aa7 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -38,8 +38,6 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; struct xe_gsc *gsc = >->uc.gsc; - bool ret = true; - unsigned int fw_ref; if (!gsc || !xe_uc_fw_is_enabled(&gsc->fw)) { drm_dbg_kms(&xe->drm, @@ -47,22 +45,15 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) return false; } - xe_pm_runtime_get(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (!fw_ref) { + guard(xe_pm_runtime)(xe); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref.domains) { drm_dbg_kms(&xe->drm, "failed to get forcewake to check proxy status\n"); - ret = false; - goto out; + return false; } - if (!xe_gsc_proxy_init_done(gsc)) - ret = false; - - xe_force_wake_put(gt_to_fw(gt), fw_ref); -out: - xe_pm_runtime_put(xe); - return ret; + return xe_gsc_proxy_init_done(gsc); } /*This function helps allocate memory for the command that we will send to gsc cs */ @@ -168,17 +159,15 @@ static ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_contex u32 addr_out_off, addr_in_wr_off = 0; int ret, tries = 0; - if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) { - ret = -ENOSPC; - goto out; - } + if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) + return -ENOSPC; msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE; msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE; addr_out_off = PAGE_SIZE; host_session_id = xe_gsc_create_host_session_id(); - xe_pm_runtime_get_noresume(xe); + guard(xe_pm_runtime_noresume)(xe); addr_in_wr_off = xe_gsc_emit_header(xe, &gsc_context->hdcp_bo->vmap, addr_in_wr_off, HECI_MEADDRESS_HDCP, host_session_id, msg_in_len); @@ -203,14 +192,12 @@ static ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_contex } while (++tries < 20); if (ret) - goto out; + return ret; xe_map_memcpy_from(xe, msg_out, &gsc_context->hdcp_bo->vmap, addr_out_off + HDCP_GSC_HEADER_SIZE, msg_out_len); -out: - xe_pm_runtime_put(xe); return ret; } diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index 5d41ca297447..885fcf211e6d 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -47,6 +47,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL0_QUEUE_DRAIN_MODE BIT(12) #define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */ #define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 917a088c28f2..93643da57428 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -227,6 +227,9 @@ #define MIRROR_FUSE1 XE_REG(0x911c) +#define FUSE2 XE_REG(0x9120) +#define PRODUCTION_HW REG_BIT(2) + #define MIRROR_L3BANK_ENABLE XE_REG(0x9130) #define XE3_L3BANK_ENABLE REG_GENMASK(31, 0) diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h index 4389e5a76f89..4d83461e538b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h @@ -9,6 +9,7 @@ #define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) #define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) +#define XE_PTE_ADDR_MASK GENMASK_ULL(51, 12) #define GGTT_PTE_VFID GENMASK_ULL(11, 2) #define GUC_GGTT_TOP 0xFEE00000 diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 2118f7dec287..87984713dd12 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -90,6 +90,9 @@ #define GUC_SEND_INTERRUPT XE_REG(0xc4c8) #define GUC_SEND_TRIGGER REG_BIT(0) +#define GUC_INTR_CHICKEN XE_REG(0xc50c) +#define DISABLE_SIGNALING_ENGINES REG_BIT(1) + #define GUC_BCS_RCS_IER XE_REG(0xc550) #define GUC_VCS2_VCS1_IER XE_REG(0xc554) #define GUC_WD_VECS_IER XE_REG(0xc558) diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index 2f97662d958d..9d74f454d3ff 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -20,6 +20,7 @@ #define GU_MISC_IRQ REG_BIT(29) #define ERROR_IRQ(x) REG_BIT(26 + (x)) #define DISPLAY_IRQ REG_BIT(16) +#define SOC_H2DMEMINT_IRQ REG_BIT(13) #define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) diff --git a/drivers/gpu/drm/xe/regs/xe_mert_regs.h b/drivers/gpu/drm/xe/regs/xe_mert_regs.h new file mode 100644 index 000000000000..c345e11ceea8 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_mert_regs.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_MERT_REGS_H_ +#define _XE_MERT_REGS_H_ + +#include "regs/xe_reg_defs.h" + +#define MERT_LMEM_CFG XE_REG(0x1448b0) + +#define MERT_TLB_CT_INTR_ERR_ID_PORT XE_REG(0x145190) +#define MERT_TLB_CT_VFID_MASK REG_GENMASK(16, 9) +#define MERT_TLB_CT_ERROR_MASK REG_GENMASK(5, 0) +#define MERT_TLB_CT_LMTT_FAULT 0x05 + +#define MERT_TLB_INV_DESC_A XE_REG(0x14cf7c) +#define MERT_TLB_INV_DESC_A_VALID REG_BIT(0) + +#endif /* _XE_MERT_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index e693a50706f8..04a729e610aa 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -100,4 +100,21 @@ #define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00) #define OAM_LAT_MEASURE_ENABLE REG_BIT(4) +/* Actual address is MEDIA_GT_GSI_OFFSET + the base addr below */ +#define XE_OAM_SAG_BASE 0x13000 +#define XE_OAM_SCMI_0_BASE 0x14000 +#define XE_OAM_SCMI_1_BASE 0x14800 +#define XE_OAM_SAG_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SAG_BASE) +#define XE_OAM_SCMI_0_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_0_BASE) +#define XE_OAM_SCMI_1_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_1_BASE) + +#define OAMERT_CONTROL XE_REG(0x1453a0) +#define OAMERT_DEBUG XE_REG(0x1453a4) +#define OAMERT_STATUS XE_REG(0x1453a8) +#define OAMERT_HEAD_POINTER XE_REG(0x1453ac) +#define OAMERT_TAIL_POINTER XE_REG(0x1453b0) +#define OAMERT_BUFFER XE_REG(0x1453b4) +#define OAMERT_CONTEXT_CONTROL XE_REG(0x1453c8) +#define OAMERT_MMIO_TRG XE_REG(0x1453cc) + #endif diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c index f3fb23aa5d2e..2687a1b054dd 100644 --- a/drivers/gpu/drm/xe/tests/xe_args_test.c +++ b/drivers/gpu/drm/xe/tests/xe_args_test.c @@ -78,6 +78,24 @@ static void pick_arg_example(struct kunit *test) #undef buz } +static void if_args_example(struct kunit *test) +{ + enum { Z = 1, Q }; + +#define foo X, Y +#define bar IF_ARGS(Z, Q, foo) +#define buz IF_ARGS(Z, Q, DROP_FIRST_ARG(FIRST_ARG(foo))) + + KUNIT_EXPECT_EQ(test, bar, Z); + KUNIT_EXPECT_EQ(test, buz, Q); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "Z"); + KUNIT_EXPECT_STREQ(test, __stringify(buz), "Q"); + +#undef foo +#undef bar +#undef buz +} + static void sep_comma_example(struct kunit *test) { #define foo(f) f(X) f(Y) f(Z) f(Q) @@ -198,6 +216,40 @@ static void last_arg_test(struct kunit *test) KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(MAX_ARGS)), "-12"); } +static void if_args_test(struct kunit *test) +{ + bool with_args = true; + bool no_args = false; + enum { X = 100 }; + + KUNIT_EXPECT_TRUE(test, IF_ARGS(true, false, FOO_ARGS)); + KUNIT_EXPECT_FALSE(test, IF_ARGS(true, false, NO_ARGS)); + + KUNIT_EXPECT_TRUE(test, CONCATENATE(IF_ARGS(with, no, FOO_ARGS), _args)); + KUNIT_EXPECT_FALSE(test, CONCATENATE(IF_ARGS(with, no, NO_ARGS), _args)); + + KUNIT_EXPECT_STREQ(test, __stringify(IF_ARGS(yes, no, FOO_ARGS)), "yes"); + KUNIT_EXPECT_STREQ(test, __stringify(IF_ARGS(yes, no, NO_ARGS)), "no"); + + KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, FOO_ARGS), -1, FOO_ARGS), 4); + KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, FOO_ARGS), -1, NO_ARGS), -1); + KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, NO_ARGS), -1, FOO_ARGS), 0); + KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, NO_ARGS), -1, NO_ARGS), -1); + + KUNIT_EXPECT_EQ(test, + CALL_ARGS(FIRST_ARG, + CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, FOO_ARGS), _ARGS)), X); + KUNIT_EXPECT_EQ(test, + CALL_ARGS(FIRST_ARG, + CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, NO_ARGS), _ARGS)), -1); + KUNIT_EXPECT_EQ(test, + CALL_ARGS(COUNT_ARGS, + CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, FOO_ARGS), _ARGS)), 4); + KUNIT_EXPECT_EQ(test, + CALL_ARGS(COUNT_ARGS, + CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, NO_ARGS), _ARGS)), 12); +} + static struct kunit_case args_tests[] = { KUNIT_CASE(count_args_test), KUNIT_CASE(call_args_example), @@ -209,6 +261,8 @@ static struct kunit_case args_tests[] = { KUNIT_CASE(last_arg_example), KUNIT_CASE(last_arg_test), KUNIT_CASE(pick_arg_example), + KUNIT_CASE(if_args_example), + KUNIT_CASE(if_args_test), KUNIT_CASE(sep_comma_example), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 2294cf89f3e1..2278e589a493 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -185,8 +185,7 @@ static int ccs_test_run_device(struct xe_device *xe) return 0; } - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_tile(tile, xe, id) { /* For igfx run only for primary tile */ if (!IS_DGFX(xe) && id > 0) @@ -194,8 +193,6 @@ static int ccs_test_run_device(struct xe_device *xe) ccs_test_run_tile(xe, tile, test); } - xe_pm_runtime_put(xe); - return 0; } @@ -356,13 +353,10 @@ static int evict_test_run_device(struct xe_device *xe) return 0; } - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_tile(tile, xe, id) evict_test_run_tile(xe, tile, test); - xe_pm_runtime_put(xe); - return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 5df98de5ba3c..954b6b911ea0 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -266,7 +266,7 @@ static int dma_buf_run_device(struct xe_device *xe) const struct dma_buf_test_params *params; struct kunit *test = kunit_get_current_test(); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); for (params = test_params; params->mem_mask; ++params) { struct dma_buf_test_params p = *params; @@ -274,7 +274,6 @@ static int dma_buf_run_device(struct xe_device *xe) test->priv = &p; xe_test_dmabuf_import_same_driver(xe); } - xe_pm_runtime_put(xe); /* A non-zero return would halt iteration over driver devices */ return 0; diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 5904d658d1f2..34e2f0f4631f 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -344,8 +344,7 @@ static int migrate_test_run_device(struct xe_device *xe) struct xe_tile *tile; int id; - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; struct drm_exec *exec = XE_VALIDATION_OPT_OUT; @@ -356,8 +355,6 @@ static int migrate_test_run_device(struct xe_device *xe) xe_vm_unlock(m->q->vm); } - xe_pm_runtime_put(xe); - return 0; } @@ -759,13 +756,10 @@ static int validate_ccs_test_run_device(struct xe_device *xe) return 0; } - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_tile(tile, xe, id) validate_ccs_test_run_tile(xe, tile, test); - xe_pm_runtime_put(xe); - return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 6bb278167aaf..daf3c6836c75 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -43,14 +43,12 @@ static void read_l3cc_table(struct xe_gt *gt, { struct kunit *test = kunit_get_current_test(); u32 l3cc, l3cc_expected; - unsigned int fw_ref, i; + unsigned int i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) KUNIT_FAIL_AND_ABORT(test, "Forcewake Failed.\n"); - } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { @@ -74,7 +72,6 @@ static void read_l3cc_table(struct xe_gt *gt, KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, "l3cc idx=%u has incorrect val.\n", i); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void read_mocs_table(struct xe_gt *gt, @@ -82,14 +79,14 @@ static void read_mocs_table(struct xe_gt *gt, { struct kunit *test = kunit_get_current_test(); u32 mocs, mocs_expected; - unsigned int fw_ref, i; + unsigned int i; u32 reg_val; KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index, "Unused entries index should have been defined\n"); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_NE_MSG(test, fw_ref.domains, 0, "Forcewake Failed.\n"); for (i = 0; i < info->num_mocs_regs; i++) { if (regs_are_mcr(gt)) @@ -106,8 +103,6 @@ static void read_mocs_table(struct xe_gt *gt, KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs, "mocs reg 0x%x has incorrect val.\n", i); } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } static int mocs_kernel_test_run_device(struct xe_device *xe) @@ -120,8 +115,7 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) unsigned int flags; int id; - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); if (flags & HAS_GLOBAL_MOCS) @@ -130,8 +124,6 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) read_l3cc_table(gt, &mocs.table); } - xe_pm_runtime_put(xe); - return 0; } @@ -155,8 +147,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe) int id; struct kunit *test = kunit_get_current_test(); - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); kunit_info(test, "mocs_reset_test before reset\n"); @@ -174,8 +165,6 @@ static int mocs_reset_test_run_device(struct xe_device *xe) read_l3cc_table(gt, &mocs.table); } - xe_pm_runtime_put(xe); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h index 4dbc7e53c624..f550b5e3b993 100644 --- a/drivers/gpu/drm/xe/xe_args.h +++ b/drivers/gpu/drm/xe/xe_args.h @@ -121,6 +121,33 @@ #define PICK_ARG11(args...) PICK_ARG10(DROP_FIRST_ARG(args)) #define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST_ARG(args)) +/** + * IF_ARGS() - Make selection based on optional argument list. + * @then: token to return if arguments are present + * @else: token to return if arguments are empty + * @...: arguments to check (optional) + * + * This macro allows to select a token based on the presence of the argument list. + * + * Example: + * + * #define foo X, Y + * #define bar IF_ARGS(Z, Q, foo) + * #define buz IF_ARGS(Z, Q, DROP_FIRST_ARG(FIRST_ARG(foo))) + * + * With above definitions bar expands to Z while buz expands to Q. + */ +#if defined(CONFIG_CC_IS_CLANG) || GCC_VERSION >= 100100 +#define IF_ARGS(then, else, ...) FIRST_ARG(__VA_OPT__(then,) else) +#else +#define IF_ARGS(then, else, ...) _IF_ARGS(then, else, CALL_ARGS(FIRST_ARG, __VA_ARGS__)) +#define _IF_ARGS(then, else, ...) __IF_ARGS(then, else, CALL_ARGS(COUNT_ARGS, __VA_ARGS__)) +#define __IF_ARGS(then, else, n) ___IF_ARGS(then, else, CALL_ARGS(CONCATENATE, ___IF_ARG, n)) +#define ___IF_ARGS(then, else, if) CALL_ARGS(if, then, else) +#define ___IF_ARG1(then, else) then +#define ___IF_ARG0(then, else) else +#endif + /** * ARGS_SEP_COMMA - Definition of a comma character. * diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bf4ee976b680..8b6474cd3eaf 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -516,8 +516,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, * non-coherent and require a CPU:WC mapping. */ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || - (xe->info.graphics_verx100 >= 1270 && - bo->flags & XE_BO_FLAG_PAGETABLE)) + (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE)) caching = ttm_write_combined; } @@ -2026,13 +2025,9 @@ static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, struct ttm_buffer_object *ttm_bo = vma->vm_private_data; struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct xe_device *xe = xe_bo_device(bo); - int ret; - xe_pm_runtime_get(xe); - ret = ttm_bo_vm_access(vma, addr, buf, len, write); - xe_pm_runtime_put(xe); - - return ret; + guard(xe_pm_runtime)(xe); + return ttm_bo_vm_access(vma, addr, buf, len, write); } /** @@ -3176,7 +3171,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->flags & ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING | DRM_XE_GEM_CREATE_FLAG_SCANOUT | - DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM))) + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | + DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION))) return -EINVAL; if (XE_IOCTL_DBG(xe, args->handle)) @@ -3198,6 +3194,12 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) bo_flags |= XE_BO_FLAG_SCANOUT; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) { + if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20)) + return -EOPNOTSUPP; + bo_flags |= XE_BO_FLAG_NO_COMPRESSION; + } + bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); /* CCS formats need physical placement at a 64K alignment in VRAM. */ @@ -3519,8 +3521,12 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) * Compression implies coh_none, therefore we know for sure that WB * memory can't currently use compression, which is likely one of the * common cases. + * Additionally, userspace may explicitly request no compression via the + * DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION flag, which should also disable + * CCS usage. */ - if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB || + bo->flags & XE_BO_FLAG_NO_COMPRESSION) return false; return true; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 911d5b90461a..8ab4474129c3 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -50,6 +50,7 @@ #define XE_BO_FLAG_GGTT3 BIT(23) #define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) #define XE_BO_FLAG_FORCE_USER_VRAM BIT(25) +#define XE_BO_FLAG_NO_COMPRESSION BIT(26) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index e91da9589c5f..0907868b32d6 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -68,7 +68,7 @@ static int info(struct seq_file *m, void *data) struct xe_gt *gt; u8 id; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); @@ -93,9 +93,10 @@ static int info(struct seq_file *m, void *data) xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); drm_printf(&p, "gt%d engine_mask 0x%llx\n", id, gt->info.engine_mask); + drm_printf(&p, "gt%d multi_queue_engine_class_mask 0x%x\n", id, + gt->info.multi_queue_engine_class_mask); } - xe_pm_runtime_put(xe); return 0; } @@ -110,9 +111,8 @@ static int sriov_info(struct seq_file *m, void *data) static int workarounds(struct xe_device *xe, struct drm_printer *p) { - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); xe_wa_device_dump(xe, p); - xe_pm_runtime_put(xe); return 0; } @@ -134,7 +134,7 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) xe = node_to_xe(m->private); p = drm_seq_file_printer(m); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); mmio = xe_root_tile_mmio(xe); static const struct { u32 offset; @@ -151,7 +151,6 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) for (int i = 0; i < ARRAY_SIZE(residencies); i++) read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); - xe_pm_runtime_put(xe); return 0; } @@ -163,7 +162,7 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data) xe = node_to_xe(m->private); p = drm_seq_file_printer(m); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); mmio = xe_root_tile_mmio(xe); static const struct { @@ -178,7 +177,6 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data) for (int i = 0; i < ARRAY_SIZE(residencies); i++) read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); - xe_pm_runtime_put(xe); return 0; } @@ -277,16 +275,14 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, xe->wedged.mode = wedged_mode; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); for_each_gt(gt, xe, id) { ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); if (ret) { xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n"); - xe_pm_runtime_put(xe); return -EIO; } } - xe_pm_runtime_put(xe); return size; } @@ -297,6 +293,39 @@ static const struct file_operations wedged_mode_fops = { .write = wedged_mode_set, }; +static ssize_t page_reclaim_hw_assist_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[8]; + int len; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->info.has_page_reclaim_hw_assist); + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t page_reclaim_hw_assist_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + bool val; + ssize_t ret; + + ret = kstrtobool_from_user(ubuf, size, &val); + if (ret) + return ret; + + xe->info.has_page_reclaim_hw_assist = val; + + return size; +} + +static const struct file_operations page_reclaim_hw_assist_fops = { + .owner = THIS_MODULE, + .read = page_reclaim_hw_assist_show, + .write = page_reclaim_hw_assist_set, +}; + static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf, size_t size, loff_t *pos) { @@ -332,6 +361,74 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = { .write = atomic_svm_timeslice_ms_set, }; +static ssize_t min_run_period_lr_ms_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->min_run_period_lr_ms); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t min_run_period_lr_ms_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + u32 min_run_period_lr_ms; + ssize_t ret; + + ret = kstrtouint_from_user(ubuf, size, 0, &min_run_period_lr_ms); + if (ret) + return ret; + + xe->min_run_period_lr_ms = min_run_period_lr_ms; + + return size; +} + +static const struct file_operations min_run_period_lr_ms_fops = { + .owner = THIS_MODULE, + .read = min_run_period_lr_ms_show, + .write = min_run_period_lr_ms_set, +}; + +static ssize_t min_run_period_pf_ms_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->min_run_period_pf_ms); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t min_run_period_pf_ms_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + u32 min_run_period_pf_ms; + ssize_t ret; + + ret = kstrtouint_from_user(ubuf, size, 0, &min_run_period_pf_ms); + if (ret) + return ret; + + xe->min_run_period_pf_ms = min_run_period_pf_ms; + + return size; +} + +static const struct file_operations min_run_period_pf_ms_fops = { + .owner = THIS_MODULE, + .read = min_run_period_pf_ms_show, + .write = min_run_period_pf_ms_set, +}; + static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf, size_t size, loff_t *pos) { @@ -375,7 +472,6 @@ void xe_debugfs_register(struct xe_device *xe) struct ttm_resource_manager *man; struct xe_tile *tile; struct xe_gt *gt; - u32 mem_type; u8 tile_id; u8 id; @@ -400,19 +496,22 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, &atomic_svm_timeslice_ms_fops); + debugfs_create_file("min_run_period_lr_ms", 0600, root, xe, + &min_run_period_lr_ms_fops); + + debugfs_create_file("min_run_period_pf_ms", 0600, root, xe, + &min_run_period_pf_ms_fops); + debugfs_create_file("disable_late_binding", 0600, root, xe, &disable_late_binding_fops); - for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { - man = ttm_manager_type(bdev, mem_type); - - if (man) { - char name[16]; - - snprintf(name, sizeof(name), "vram%d_mm", mem_type - XE_PL_VRAM0); - ttm_resource_manager_create_debugfs(man, root, name); - } - } + /* + * Don't expose page reclaim configuration file if not supported by the + * hardware initially. + */ + if (xe->info.has_page_reclaim_hw_assist) + debugfs_create_file("page_reclaim_hw_assist", 0600, root, xe, + &page_reclaim_hw_assist_fops); man = ttm_manager_type(bdev, XE_PL_TT); ttm_resource_manager_create_debugfs(man, root, "gtt_mm"); diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d444eda65ca6..7263c2a5f3a8 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -276,7 +276,6 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); struct xe_device *xe = coredump_to_xe(coredump); - unsigned int fw_ref; /* * NB: Despite passing a GFP_ flags parameter here, more allocations are done @@ -287,15 +286,15 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_devcoredump_read, xe_devcoredump_free, XE_COREDUMP_TIMEOUT_JIFFIES); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); /* keep going if fw fails as we still want to save the memory and SW data */ - fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); + xe_with_force_wake(fw_ref, gt_to_fw(ss->gt), XE_FORCEWAKE_ALL) { + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + } ss->read.chunk_position = 0; @@ -306,7 +305,7 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, GFP_USER); if (!ss->read.buffer) - goto put_pm; + return; __xe_devcoredump_read(ss->read.buffer, XE_DEVCOREDUMP_CHUNK_MAX, @@ -314,15 +313,12 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) } else { ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); if (!ss->read.buffer) - goto put_pm; + return; __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, coredump); xe_devcoredump_snapshot_free(ss); } - -put_pm: - xe_pm_runtime_put(xe); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, @@ -332,7 +328,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; ss->snapshot_time = ktime_get_real(); @@ -348,10 +343,10 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->gt = q->gt; INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); - cookie = dma_fence_begin_signalling(); - /* keep going if fw fails as we still want to save the memory and SW data */ - fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + + cookie = dma_fence_begin_signalling(); ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); @@ -364,7 +359,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, queue_work(system_unbound_wq, &ss->work); - xe_force_wake_put(gt_to_fw(q->gt), fw_ref); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index cf29e259861f..00afc84a8683 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -166,7 +166,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) struct xe_exec_queue *q; unsigned long idx; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); /* * No need for exec_queue.lock here as there is no contention for it @@ -177,15 +177,18 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_for_each(&xef->exec_queue.xa, idx, q) { if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - xe_exec_queue_kill(q); + + if (xe_exec_queue_is_multi_queue_primary(q)) + xe_exec_queue_group_kill_put(q->multi_queue.group); + else + xe_exec_queue_kill(q); + xe_exec_queue_put(q); } xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); xe_file_put(xef); - - xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { @@ -209,6 +212,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -220,10 +225,10 @@ static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (xe_device_wedged(xe)) return -ECANCELED; - ret = xe_pm_runtime_get_ioctl(xe); + ACQUIRE(xe_pm_runtime_ioctl, pm)(xe); + ret = ACQUIRE_ERR(xe_pm_runtime_ioctl, &pm); if (ret >= 0) ret = drm_ioctl(file, cmd, arg); - xe_pm_runtime_put(xe); return ret; } @@ -238,10 +243,10 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo if (xe_device_wedged(xe)) return -ECANCELED; - ret = xe_pm_runtime_get_ioctl(xe); + ACQUIRE(xe_pm_runtime_ioctl, pm)(xe); + ret = ACQUIRE_ERR(xe_pm_runtime_ioctl, &pm); if (ret >= 0) ret = drm_compat_ioctl(file, cmd, arg); - xe_pm_runtime_put(xe); return ret; } @@ -455,6 +460,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.revid = pdev->revision; xe->info.force_execlist = xe_modparam.force_execlist; xe->atomic_svm_timeslice_ms = 5; + xe->min_run_period_lr_ms = 5; err = xe_irq_init(xe); if (err) @@ -775,7 +781,6 @@ ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ static int probe_has_flat_ccs(struct xe_device *xe) { struct xe_gt *gt; - unsigned int fw_ref; u32 reg; /* Always enabled/disabled, no runtime check to do */ @@ -786,8 +791,8 @@ static int probe_has_flat_ccs(struct xe_device *xe) if (!gt) return 0; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); @@ -797,11 +802,64 @@ static int probe_has_flat_ccs(struct xe_device *xe) drm_dbg(&xe->drm, "Flat CCS has been disabled in bios, May lead to performance impact"); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return 0; } +/* + * Detect if the driver is being run on pre-production hardware. We don't + * keep workarounds for pre-production hardware long term, so print an + * error and add taint if we're being loaded on a pre-production platform + * for which the pre-prod workarounds have already been removed. + * + * The general policy is that we'll remove any workarounds that only apply to + * pre-production hardware around the time force_probe restrictions are lifted + * for a platform of the next major IP generation (for example, Xe2 pre-prod + * workarounds should be removed around the time the first Xe3 platforms have + * force_probe lifted). + */ +static void detect_preproduction_hw(struct xe_device *xe) +{ + struct xe_gt *gt; + int id; + + /* + * SR-IOV VFs don't have access to the FUSE2 register, so we can't + * check pre-production status there. But the host OS will notice + * and report the pre-production status, which should be enough to + * help us catch mistaken use of pre-production hardware. + */ + if (IS_SRIOV_VF(xe)) + return; + + /* + * The "SW_CAP" fuse contains a bit indicating whether the device is a + * production or pre-production device. This fuse is reflected through + * the GT "FUSE2" register, even though the contents of the fuse are + * not GT-specific. Every GT's reflection of this fuse should show the + * same value, so we'll just use the first available GT for lookup. + */ + for_each_gt(gt, xe, id) + break; + + if (!gt) + return; + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) { + xe_gt_err(gt, "Forcewake failure; cannot determine production/pre-production hw status.\n"); + return; + } + + if (xe_mmio_read32(>->mmio, FUSE2) & PRODUCTION_HW) + return; + + xe_info(xe, "Pre-production hardware detected.\n"); + if (!xe->info.has_pre_prod_wa) { + xe_err(xe, "Pre-production workarounds for this platform have already been removed.\n"); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + } +} + int xe_device_probe(struct xe_device *xe) { struct xe_tile *tile; @@ -972,6 +1030,8 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + detect_preproduction_hw(xe); + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: @@ -1034,7 +1094,6 @@ void xe_device_wmb(struct xe_device *xe) */ static void tdf_request_sync(struct xe_device *xe) { - unsigned int fw_ref; struct xe_gt *gt; u8 id; @@ -1042,8 +1101,8 @@ static void tdf_request_sync(struct xe_device *xe) if (xe_gt_is_media_type(gt)) continue; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); @@ -1058,15 +1117,12 @@ static void tdf_request_sync(struct xe_device *xe) if (xe_mmio_wait32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, 300, NULL, false)) xe_gt_err_once(gt, "TD flush timeout\n"); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } } void xe_device_l2_flush(struct xe_device *xe) { struct xe_gt *gt; - unsigned int fw_ref; gt = xe_root_mmio_gt(xe); if (!gt) @@ -1075,8 +1131,8 @@ void xe_device_l2_flush(struct xe_device *xe) if (!XE_GT_WA(gt, 16023588340)) return; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; spin_lock(>->global_invl_lock); @@ -1086,8 +1142,6 @@ void xe_device_l2_flush(struct xe_device *xe) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 32cc6323b7f6..6604b89330d5 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -172,6 +172,11 @@ static inline bool xe_device_has_lmtt(struct xe_device *xe) return IS_DGFX(xe); } +static inline bool xe_device_has_mert(struct xe_device *xe) +{ + return xe->info.has_mert; +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index ec9c06b06fb5..a73e0e957cb0 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -57,9 +57,8 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold); - xe_pm_runtime_put(xe); return ret ?: count; } @@ -84,33 +83,31 @@ lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, c u16 major = 0, minor = 0, hotfix = 0, build = 0; int ret; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); if (ret) - goto out; + return ret; if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), &ver_low, NULL); if (ret) - goto out; + return ret; ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), &ver_high, NULL); if (ret) - goto out; + return ret; major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); } -out: - xe_pm_runtime_put(xe); - return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); + return sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); } static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); @@ -123,33 +120,31 @@ lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *a u16 major = 0, minor = 0, hotfix = 0, build = 0; int ret; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); if (ret) - goto out; + return ret; if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), &ver_low, NULL); if (ret) - goto out; + return ret; ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), &ver_high, NULL); if (ret) - goto out; + return ret; major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); } -out: - xe_pm_runtime_put(xe); - return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); + return sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); } static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); @@ -233,9 +228,8 @@ auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *at struct xe_device *xe = pdev_to_xe_device(pdev); u32 cap, val; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP); - xe_pm_runtime_put(xe); cap = REG_FIELD_GET(LINK_DOWNGRADE, val); return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE); @@ -251,11 +245,10 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att u32 val = 0; int ret; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = xe_pcode_read(xe_device_get_root_tile(xe), PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0), &val, NULL); - xe_pm_runtime_put(xe); return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val)); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6ce3247d1bd8..dad355fec50c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,7 @@ #include "xe_late_bind_fw_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" +#include "xe_mert.h" #include "xe_oa_types.h" #include "xe_pagefault_types.h" #include "xe_platform_types.h" @@ -183,6 +184,13 @@ struct xe_tile { * Media GT shares a pool with its primary GT. */ struct xe_sa_manager *kernel_bb_pool; + + /** + * @mem.reclaim_pool: Pool for PRLs allocated. + * + * Only main GT has page reclaim list allocations. + */ + struct xe_sa_manager *reclaim_pool; } mem; /** @sriov: tile level virtualization data */ @@ -219,6 +227,9 @@ struct xe_tile { /** @debugfs: debugfs directory associated with this tile */ struct dentry *debugfs; + + /** @mert: MERT-related data */ + struct xe_mert mert; }; /** @@ -285,6 +296,8 @@ struct xe_device { u8 has_asid:1; /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ u8 has_atomic_enable_pte_bit:1; + /** @info.has_cached_pt: Supports caching pagetable */ + u8 has_cached_pt:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; /** @info.has_fan_control: Device supports fan control */ @@ -297,6 +310,8 @@ struct xe_device { u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + /** @info.has_i2c: Device has I2C controller */ + u8 has_i2c:1; /** @info.has_late_bind: Device has firmware late binding support */ u8 has_late_bind:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ @@ -307,6 +322,12 @@ struct xe_device { u8 has_mbx_power_limits:1; /** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */ u8 has_mem_copy_instr:1; + /** @info.has_mert: Device has standalone MERT */ + u8 has_mert:1; + /** @info.has_page_reclaim_hw_assist: Device supports page reclamation feature */ + u8 has_page_reclaim_hw_assist:1; + /** @info.has_pre_prod_wa: Pre-production workarounds still present in driver */ + u8 has_pre_prod_wa:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; /** @info.has_range_tlb_inval: Has range based TLB invalidations */ @@ -605,6 +626,12 @@ struct xe_device { /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ u32 atomic_svm_timeslice_ms; + /** @min_run_period_lr_ms: LR VM (preempt fence mode) timeslice */ + u32 min_run_period_lr_ms; + + /** @min_run_period_pf_ms: LR VM (page fault mode) timeslice */ + u32 min_run_period_pf_ms; + #ifdef TEST_VM_OPS_ERROR /** * @vm_inject_error_position: inject errors at different places in VM diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index f931ff9b1ec0..2787bbb36141 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -285,32 +285,31 @@ static struct xe_hw_engine *any_engine(struct xe_device *xe) return NULL; } -static bool force_wake_get_any_engine(struct xe_device *xe, - struct xe_hw_engine **phwe, - unsigned int *pfw_ref) +/* + * Pick any engine and grab its forcewake. On error phwe will be NULL and + * the returned forcewake reference will be invalid. Callers should check + * phwe against NULL. + */ +static struct xe_force_wake_ref force_wake_get_any_engine(struct xe_device *xe, + struct xe_hw_engine **phwe) { enum xe_force_wake_domains domain; - unsigned int fw_ref; + struct xe_force_wake_ref fw_ref = {}; struct xe_hw_engine *hwe; - struct xe_force_wake *fw; + + *phwe = NULL; hwe = any_engine(xe); if (!hwe) - return false; + return fw_ref; /* will be invalid */ domain = xe_hw_engine_to_fw_domain(hwe); - fw = gt_to_fw(hwe->gt); - fw_ref = xe_force_wake_get(fw, domain); - if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { - xe_force_wake_put(fw, fw_ref); - return false; - } + fw_ref = xe_force_wake_constructor(gt_to_fw(hwe->gt), domain); + if (xe_force_wake_ref_has_domain(fw_ref.domains, domain)) + *phwe = hwe; /* valid forcewake */ - *phwe = hwe; - *pfw_ref = fw_ref; - - return true; + return fw_ref; } static void show_run_ticks(struct drm_printer *p, struct drm_file *file) @@ -322,7 +321,6 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) struct xe_hw_engine *hwe; struct xe_exec_queue *q; u64 gpu_timestamp; - unsigned int fw_ref; /* * RING_TIMESTAMP registers are inaccessible in VF mode. @@ -339,29 +337,26 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) wait_var_event(&xef->exec_queue.pending_removal, !atomic_read(&xef->exec_queue.pending_removal)); - xe_pm_runtime_get(xe); - if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) { - xe_pm_runtime_put(xe); - return; - } + scoped_guard(xe_pm_runtime, xe) { + CLASS(xe_force_wake_release_only, fw_ref)(force_wake_get_any_engine(xe, &hwe)); + if (!hwe) + return; - /* Accumulate all the exec queues from this client */ - mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { - xe_exec_queue_get(q); + /* Accumulate all the exec queues from this client */ + mutex_lock(&xef->exec_queue.lock); + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } mutex_unlock(&xef->exec_queue.lock); - xe_exec_queue_update_run_ticks(q); - - mutex_lock(&xef->exec_queue.lock); - xe_exec_queue_put(q); + gpu_timestamp = xe_hw_engine_read_timestamp(hwe); } - mutex_unlock(&xef->exec_queue.lock); - - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - - xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref); - xe_pm_runtime_put(xe); for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { const char *class_name; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index fd9480031750..730a5c9c2637 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -121,7 +121,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs, num_ufence = 0; + u32 i, num_syncs, num_in_sync = 0, num_ufence = 0; struct xe_validation_ctx ctx; struct xe_sched_job *job; struct xe_vm *vm; @@ -183,6 +183,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (xe_sync_is_ufence(&syncs[num_syncs])) num_ufence++; + + if (!num_in_sync && xe_sync_needs_wait(&syncs[num_syncs])) + num_in_sync++; } if (XE_IOCTL_DBG(xe, num_ufence > 1)) { @@ -203,7 +206,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) mode = xe_hw_engine_group_find_exec_mode(q); if (mode == EXEC_MODE_DMA_FENCE) { - err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode, + syncs, num_in_sync ? + num_syncs : 0); if (err) goto err_syncs; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8724f8de67e2..41023a464480 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -13,6 +13,7 @@ #include #include +#include "xe_bo.h" #include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" @@ -53,6 +54,54 @@ * the ring operations the different engine classes support. */ +/** + * DOC: Multi Queue Group + * + * Multi Queue Group is another mode of execution supported by the compute + * and blitter copy command streamers (CCS and BCS, respectively). It is + * an enhancement of the existing hardware architecture and leverages the + * same submission model. It enables support for efficient, parallel + * execution of multiple queues within a single shared context. The multi + * queue group functionality is only supported with GuC submission backend. + * All the queues of a group must use the same address space (VM). + * + * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property + * supports creating a multi queue group and adding queues to a queue group. + * + * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field + * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with + * the queue being created as the primary queue (aka q0) of the group. To add + * secondary queues to the group, they need to be created with the above + * property with id of the primary queue as the value. The properties of + * the primary queue (like priority, time slice) applies to the whole group. + * So, these properties can't be set for secondary queues of a group. + * + * The hardware does not support removing a queue from a multi-queue group. + * However, queues can be dynamically added to the group. A group can have + * up to 64 queues. To support this, XeKMD holds references to LRCs of the + * queues even after the queues are destroyed by the user until the whole + * group is destroyed. The secondary queues hold a reference to the primary + * queue thus preventing the group from being destroyed when user destroys + * the primary queue. Once the primary queue is destroyed, secondary queues + * can't be added to the queue group, but they can continue to submit the + * jobs if the DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag is set during the multi + * queue group creation. + * + * The queues of a multi queue group can set their priority within the group + * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property. + * This multi queue priority can also be set dynamically through the + * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property + * supported by the secondary queues of a multi queue group, other than + * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. + * + * When GuC reports an error on any of the queues of a multi queue group, + * the queue cleanup mechanism is invoked for all the queues of the group + * as hardware cannot make progress on the multi queue context. + * + * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC + * interface. + */ + enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_JOB_TIMEOUT = 0, XE_EXEC_QUEUE_TIMESLICE = 1, @@ -61,7 +110,35 @@ enum xe_exec_queue_sched_prop { }; static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number); + u64 extensions); + +static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + unsigned long idx; + + if (xe_exec_queue_is_multi_queue_secondary(q)) { + /* + * Put pairs with get from xe_exec_queue_lookup() call + * in xe_exec_queue_group_validate(). + */ + xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); + return; + } + + if (!group) + return; + + /* Primary queue cleanup */ + xa_for_each(&group->xa, idx, lrc) + xe_lrc_put(lrc); + + xa_destroy(&group->xa); + mutex_destroy(&group->list_lock); + xe_bo_unpin_map_no_vm(group->cgp_bo); + kfree(group); +} static void __xe_exec_queue_free(struct xe_exec_queue *q) { @@ -73,12 +150,17 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); + + if (xe_exec_queue_is_multi_queue(q)) + xe_exec_queue_group_cleanup(q); + if (q->vm) xe_vm_put(q->vm); if (q->xef) xe_file_put(q->xef); + kvfree(q->replay_state); kfree(q); } @@ -147,6 +229,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); + q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -175,7 +258,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, * may set q->usm, must come before xe_lrc_create(), * may overwrite q->sched_props, must come before q->ops->init() */ - err = exec_queue_user_extensions(xe, q, extensions, 0); + err = exec_queue_user_extensions(xe, q, extensions); if (err) { __xe_exec_queue_free(q); return ERR_PTR(err); @@ -225,8 +308,8 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) struct xe_lrc *lrc; xe_gt_sriov_vf_wait_valid_ggtt(q->gt); - lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(), - q->msix_vec, flags); + lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, + xe_lrc_ring_size(), q->msix_vec, flags); if (IS_ERR(lrc)) { err = PTR_ERR(lrc); goto err_lrc; @@ -383,6 +466,26 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); +static void xe_exec_queue_group_kill(struct kref *ref) +{ + struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group, + kill_refcount); + xe_exec_queue_kill(group->primary); +} + +static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group) +{ + kref_get(&group->kill_refcount); +} + +void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group) +{ + if (!group) + return; + + kref_put(&group->kill_refcount, xe_exec_queue_group_kill); +} + void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); @@ -567,6 +670,217 @@ exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); } +static int exec_queue_set_hang_replay_state(struct xe_device *xe, + struct xe_exec_queue *q, + u64 value) +{ + size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); + u64 __user *address = u64_to_user_ptr(value); + void *ptr; + + ptr = vmemdup_user(address, size); + if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) + return PTR_ERR(ptr); + + q->replay_state = ptr; + + return 0; +} + +static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + struct xe_exec_queue_group *group; + struct xe_bo *bo; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return -ENOMEM; + + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_FORCE_USER_VRAM | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_GGTT, false); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", + PTR_ERR(bo)); + kfree(group); + return PTR_ERR(bo); + } + + xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); + + group->primary = q; + group->cgp_bo = bo; + INIT_LIST_HEAD(&group->list); + kref_init(&group->kill_refcount); + xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); + mutex_init(&group->list_lock); + q->multi_queue.group = group; + + /* group->list_lock is used in submission backend */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&group->list_lock); + fs_reclaim_release(GFP_KERNEL); + } + + return 0; +} + +static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) +{ + return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); +} + +static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, + u32 primary_id) +{ + struct xe_exec_queue_group *group; + struct xe_exec_queue *primary; + int ret; + + /* + * Get from below xe_exec_queue_lookup() pairs with put + * in xe_exec_queue_group_cleanup(). + */ + primary = xe_exec_queue_lookup(q->vm->xef, primary_id); + if (XE_IOCTL_DBG(xe, !primary)) + return -ENOENT; + + if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || + XE_IOCTL_DBG(xe, q->vm != primary->vm) || + XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { + ret = -EINVAL; + goto put_primary; + } + + group = primary->multi_queue.group; + q->multi_queue.valid = true; + q->multi_queue.group = group; + + return 0; +put_primary: + xe_exec_queue_put(primary); + return ret; +} + +#define XE_MAX_GROUP_SIZE 64 +static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 pos; + int err; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + /* Primary queue holds a reference to LRCs of all secondary queues */ + err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), + XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); + if (XE_IOCTL_DBG(xe, err)) { + xe_lrc_put(q->lrc[0]); + + /* It is invalid if queue group limit is exceeded */ + if (err == -EBUSY) + err = -EINVAL; + + return err; + } + + q->multi_queue.pos = pos; + + if (group->primary->multi_queue.keep_active) { + xe_exec_queue_group_kill_get(group); + q->multi_queue.keep_active = true; + } + + return 0; +} + +static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + lrc = xa_erase(&group->xa, q->multi_queue.pos); + xe_assert(xe, lrc); + xe_lrc_put(lrc); + + if (q->multi_queue.keep_active) { + xe_exec_queue_group_kill_put(group); + q->multi_queue.keep_active = false; + } +} + +static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) + return -ENODEV; + + if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, !q->vm->xef)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) + return -EINVAL; + + if (value & DRM_XE_MULTI_GROUP_CREATE) { + if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE | + DRM_XE_MULTI_GROUP_KEEP_ACTIVE))) + return -EINVAL; + + /* + * KEEP_ACTIVE is not supported in preempt fence mode as in that mode, + * VM_DESTROY ioctl expects all exec queues of that VM are already killed. + */ + if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) && + xe_vm_in_preempt_fence_mode(q->vm))) + return -EINVAL; + + q->multi_queue.valid = true; + q->multi_queue.is_primary = true; + q->multi_queue.pos = 0; + if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) + q->multi_queue.keep_active = true; + + return 0; + } + + /* While adding secondary queues, the upper 32 bits must be 0 */ + if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) + return -EINVAL; + + return xe_exec_queue_group_validate(xe, q, value); +} + +static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) + return -EINVAL; + + /* For queue creation time (!q->xef) setting, just store the priority value */ + if (!q->xef) { + q->multi_queue.priority = value; + return 0; + } + + if (!xe_exec_queue_is_multi_queue(q)) + return -EINVAL; + + return q->ops->set_multi_queue_priority(q, value); +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -575,11 +889,76 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, + [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = + exec_queue_set_multi_queue_priority, }; +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_set_property *args = data; + struct xe_exec_queue *q; + int ret; + u32 idx; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->property != + DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) + return -EINVAL; + + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + idx = array_index_nospec(args->property, + ARRAY_SIZE(exec_queue_set_property_funcs)); + ret = exec_queue_set_property_funcs[idx](xe, q, args->value); + if (XE_IOCTL_DBG(xe, ret)) + goto err_post_lookup; + + xe_exec_queue_put(q); + return 0; + + err_post_lookup: + xe_exec_queue_put(q); + return ret; +} + +static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) +{ + u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | + BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); + + /* + * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a + * multi-queue group. + */ + if (xe_exec_queue_is_multi_queue_secondary(q) && + properties & ~secondary_queue_valid_props) + return -EINVAL; + + return 0; +} + +static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) +{ + /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ + if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && + !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) + return -EINVAL; + + return 0; +} + static int exec_queue_user_ext_set_property(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension) + u64 extension, u64 *properties) { u64 __user *address = u64_to_user_ptr(extension); struct drm_xe_ext_set_property ext; @@ -595,27 +974,35 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, XE_IOCTL_DBG(xe, ext.pad) || XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && + ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); if (!exec_queue_set_property_funcs[idx]) return -EINVAL; + *properties |= BIT_ULL(idx); + err = exec_queue_user_ext_check(q, *properties); + if (XE_IOCTL_DBG(xe, err)) + return err; + return exec_queue_set_property_funcs[idx](xe, q, ext.value); } typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension); + u64 extension, u64 *properties); static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; #define MAX_USER_EXTENSIONS 16 -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number) +static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions, int ext_number, u64 *properties) { u64 __user *address = u64_to_user_ptr(extensions); struct drm_xe_user_extension ext; @@ -636,13 +1023,36 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue idx = array_index_nospec(ext.name, ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) - return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number); + return __exec_queue_user_extensions(xe, q, ext.next_extension, + ++ext_number, properties); + + return 0; +} + +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions) +{ + u64 properties = 0; + int err; + + err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); + if (XE_IOCTL_DBG(xe, err)) + return err; + + err = exec_queue_user_ext_check_final(q, properties); + if (XE_IOCTL_DBG(xe, err)) + return err; + + if (xe_exec_queue_is_multi_queue_primary(q)) { + err = xe_exec_queue_group_init(xe, q); + if (XE_IOCTL_DBG(xe, err)) + return err; + } return 0; } @@ -798,12 +1208,18 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(q)) return PTR_ERR(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + err = xe_exec_queue_group_add(xe, q); + if (XE_IOCTL_DBG(xe, err)) + goto put_exec_queue; + } + if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; + goto delete_queue_group; } if (q->vm && q->hwe->hw_engine_group) { @@ -826,6 +1242,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, kill_exec_queue: xe_exec_queue_kill(q); +delete_queue_group: + if (xe_exec_queue_is_multi_queue_secondary(q)) + xe_exec_queue_group_delete(xe, q); put_exec_queue: xe_exec_queue_put(q); return err; @@ -981,6 +1400,11 @@ void xe_exec_queue_kill(struct xe_exec_queue *q) q->ops->kill(q); xe_vm_remove_compute_exec_queue(q->vm, q); + + if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) { + xe_exec_queue_group_kill_put(q->multi_queue.group); + q->multi_queue.keep_active = false; + } } int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, @@ -1007,7 +1431,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - xe_exec_queue_kill(q); + if (xe_exec_queue_is_multi_queue_primary(q)) + xe_exec_queue_group_kill_put(q->multi_queue.group); + else + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index fda4d4f9bda8..b5ad975d7e97 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -66,6 +66,55 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q) return q->pxp.type; } +/** + * xe_exec_queue_is_multi_queue() - Whether an exec_queue is part of a queue group. + * @q: The exec_queue + * + * Return: True if the exec_queue is part of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue(struct xe_exec_queue *q) +{ + return q->multi_queue.valid; +} + +/** + * xe_exec_queue_is_multi_queue_primary() - Whether an exec_queue is primary queue + * of a multi queue group. + * @q: The exec_queue + * + * Return: True if @q is primary queue of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue_primary(struct xe_exec_queue *q) +{ + return q->multi_queue.is_primary; +} + +/** + * xe_exec_queue_is_multi_queue_secondary() - Whether an exec_queue is secondary queue + * of a multi queue group. + * @q: The exec_queue + * + * Return: True if @q is secondary queue of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue_secondary(struct xe_exec_queue *q) +{ + return xe_exec_queue_is_multi_queue(q) && !xe_exec_queue_is_multi_queue_primary(q); +} + +/** + * xe_exec_queue_multi_queue_primary() - Get multi queue group's primary queue + * @q: The exec_queue + * + * If @q belongs to a multi queue group, then the primary queue of the group will + * be returned. Otherwise, @q will be returned. + */ +static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_exec_queue *q) +{ + return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q; +} + +void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group); + bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_is_idle(struct xe_exec_queue *q); @@ -78,6 +127,8 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); @@ -111,4 +162,21 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); +/** + * xe_exec_queue_idle_skip_suspend() - Can exec queue skip suspend + * @q: The exec_queue + * + * If an exec queue is not parallel and is idle, the suspend steps can be + * skipped in the submission backend immediatley signaling the suspend fence. + * Parallel queues cannot skip this step due to limitations in the submission + * backend. + * + * Return: True if exec queue is idle and can skip suspend steps, False + * otherwise + */ +static inline bool xe_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) +{ + return !xe_exec_queue_is_parallel(q) && xe_exec_queue_is_idle(q); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 771ffe35cd0c..67ea5eebf70b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -32,6 +32,44 @@ enum xe_exec_queue_priority { XE_EXEC_QUEUE_PRIORITY_COUNT }; +/** + * enum xe_multi_queue_priority - Multi Queue priority values + * + * The priority values of the queues within the multi queue group. + */ +enum xe_multi_queue_priority { + /** @XE_MULTI_QUEUE_PRIORITY_LOW: Priority low */ + XE_MULTI_QUEUE_PRIORITY_LOW = 0, + /** @XE_MULTI_QUEUE_PRIORITY_NORMAL: Priority normal */ + XE_MULTI_QUEUE_PRIORITY_NORMAL, + /** @XE_MULTI_QUEUE_PRIORITY_HIGH: Priority high */ + XE_MULTI_QUEUE_PRIORITY_HIGH, +}; + +/** + * struct xe_exec_queue_group - Execution multi queue group + * + * Contains multi queue group information. + */ +struct xe_exec_queue_group { + /** @primary: Primary queue of this group */ + struct xe_exec_queue *primary; + /** @cgp_bo: BO for the Context Group Page */ + struct xe_bo *cgp_bo; + /** @xa: xarray to store LRCs */ + struct xarray xa; + /** @list: List of all secondary queues in the group */ + struct list_head list; + /** @list_lock: Secondary queue list lock */ + struct mutex list_lock; + /** @kill_refcount: ref count to kill primary queue */ + struct kref kill_refcount; + /** @sync_pending: CGP_SYNC_DONE g2h response pending */ + bool sync_pending; + /** @banned: Group banned */ + bool banned; +}; + /** * struct xe_exec_queue - Execution queue * @@ -111,6 +149,24 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; + /** @multi_queue: Multi queue information */ + struct { + /** @multi_queue.group: Queue group information */ + struct xe_exec_queue_group *group; + /** @multi_queue.link: Link into group's secondary queues list */ + struct list_head link; + /** @multi_queue.priority: Queue priority within the multi-queue group */ + enum xe_multi_queue_priority priority; + /** @multi_queue.pos: Position of queue within the multi-queue group */ + u8 pos; + /** @multi_queue.valid: Queue belongs to a multi queue group */ + u8 valid:1; + /** @multi_queue.is_primary: Is primary queue (Q0) of the group */ + u8 is_primary:1; + /** @multi_queue.keep_active: Keep the group active after primary is destroyed */ + u8 keep_active:1; + } multi_queue; + /** @sched_props: scheduling properties */ struct { /** @sched_props.timeslice_us: timeslice period in micro-seconds */ @@ -167,6 +223,9 @@ struct xe_exec_queue { /** @ufence_timeline_value: User fence timeline value */ u64 ufence_timeline_value; + /** @replay_state: GPU hang replay state */ + void *replay_state; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; @@ -213,6 +272,9 @@ struct xe_exec_queue_ops { int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us); /** @set_preempt_timeout: Set preemption timeout for exec queue */ int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us); + /** @set_multi_queue_priority: Set multi queue priority */ + int (*set_multi_queue_priority)(struct xe_exec_queue *q, + enum xe_multi_queue_priority priority); /** * @suspend: Suspend exec queue from executing, allowed to be called * multiple times in a row before resume with the caveat that diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 769d05517f93..46c17a18a3f4 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -269,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, port->hwe = hwe; - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); + port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); if (IS_ERR(port->lrc)) { err = PTR_ERR(port->lrc); goto err; diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index c59a9b330697..76e054f314ee 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -166,6 +166,13 @@ static int domain_sleep_wait(struct xe_gt *gt, * xe_force_wake_ref_has_domain() function. Caller must call * xe_force_wake_put() function to decrease incremented refcounts. * + * When possible, scope-based forcewake (through CLASS(xe_force_wake, ...) or + * xe_with_force_wake()) should be used instead of direct calls to this + * function. Direct usage of get/put should only be used when the function + * has goto-based flows that can interfere with scope-based cleanup, or when + * the lifetime of the forcewake reference does not match a specific scope + * (e.g., forcewake obtained in one function and released in a different one). + * * Return: opaque reference to woken domains or zero if none of requested * domains were awake. */ diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 0e3e84bfa51c..1e2198f6a007 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -61,4 +61,44 @@ xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains dom return fw_ref & domain; } +struct xe_force_wake_ref { + struct xe_force_wake *fw; + unsigned int domains; +}; + +static struct xe_force_wake_ref +xe_force_wake_constructor(struct xe_force_wake *fw, unsigned int domains) +{ + struct xe_force_wake_ref fw_ref = { .fw = fw }; + + fw_ref.domains = xe_force_wake_get(fw, domains); + + return fw_ref; +} + +DEFINE_CLASS(xe_force_wake, struct xe_force_wake_ref, + xe_force_wake_put(_T.fw, _T.domains), + xe_force_wake_constructor(fw, domains), + struct xe_force_wake *fw, unsigned int domains); + +/* + * Scoped helper for the forcewake class, using the same trick as scoped_guard() + * to bind the lifetime to the next statement/block. + */ +#define __xe_with_force_wake(ref, fw, domains, done) \ + for (CLASS(xe_force_wake, ref)(fw, domains), *(done) = NULL; \ + !(done); (done) = (void *)1) + +#define xe_with_force_wake(ref, fw, domains) \ + __xe_with_force_wake(ref, fw, domains, __UNIQUE_ID(done)) + +/* + * Used when xe_force_wake_constructor() has already been called by another + * function and the current function is responsible for releasing the forcewake + * reference in all possible cases and error paths. + */ +DEFINE_CLASS(xe_force_wake_release_only, struct xe_force_wake_ref, + if (_T.fw) xe_force_wake_put(_T.fw, _T.domains), fw_ref, + struct xe_force_wake_ref fw_ref); + #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ef481b334af4..48ab8b43fcd0 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -396,9 +396,8 @@ static void ggtt_node_remove_work_func(struct work_struct *work) delayed_removal_work); struct xe_device *xe = tile_to_xe(node->ggtt->tile); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ggtt_node_remove(node); - xe_pm_runtime_put(xe); } /** diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index dd69cb834f8e..a3157b0fe791 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -352,7 +352,6 @@ static void gsc_work(struct work_struct *work) struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; u32 actions; int ret; @@ -361,13 +360,12 @@ static void gsc_work(struct work_struct *work) gsc->work_actions = 0; spin_unlock_irq(&gsc->lock); - xe_pm_runtime_get(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + guard(xe_pm_runtime)(xe); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); if (actions & GSC_ACTION_ER_COMPLETE) { - ret = gsc_er_complete(gt); - if (ret) - goto out; + if (gsc_er_complete(gt)) + return; } if (actions & GSC_ACTION_FW_LOAD) { @@ -380,10 +378,6 @@ static void gsc_work(struct work_struct *work) if (actions & GSC_ACTION_SW_PROXY) xe_gsc_proxy_request_handler(gsc); - -out: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); } void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) @@ -615,7 +609,6 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) { struct xe_gt *gt = gsc_to_gt(gsc); struct xe_mmio *mmio = >->mmio; - unsigned int fw_ref; xe_uc_fw_print(&gsc->fw, p); @@ -624,8 +617,8 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) if (!xe_uc_fw_is_enabled(&gsc->fw)) return; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref.domains) return; drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", @@ -635,6 +628,4 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) xe_mmio_read32(mmio, HECI_FWSTS4(MTL_GSC_HECI1_BASE)), xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gsc_debugfs.c b/drivers/gpu/drm/xe/xe_gsc_debugfs.c index 461d7e99c2b3..b13928b50eb9 100644 --- a/drivers/gpu/drm/xe/xe_gsc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gsc_debugfs.c @@ -37,9 +37,8 @@ static int gsc_info(struct seq_file *m, void *data) struct xe_device *xe = gsc_to_xe(gsc); struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); xe_gsc_print_info(gsc, &p); - xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 464282a89eef..e7573a0c5e5d 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -440,22 +440,19 @@ static void xe_gsc_proxy_remove(void *arg) struct xe_gsc *gsc = arg; struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref = 0; if (!gsc->proxy.component_added) return; /* disable HECI2 IRQs */ - xe_pm_runtime_get(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (!fw_ref) - xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); + scoped_guard(xe_pm_runtime, xe) { + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref.domains) + xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); - /* try do disable irq even if forcewake failed */ - gsc_proxy_irq_toggle(gsc, false); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); + /* try do disable irq even if forcewake failed */ + gsc_proxy_irq_toggle(gsc, false); + } xe_gsc_wait_for_worker_completion(gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index cdce210e36f2..313ce83ab0e5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -103,14 +103,13 @@ void xe_gt_sanitize(struct xe_gt *gt) static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) { - unsigned int fw_ref; u32 reg; if (!XE_GT_WA(gt, 16023588340)) return; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; if (xe_gt_is_main_type(gt)) { @@ -120,12 +119,10 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) } xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); - xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) { - unsigned int fw_ref; u32 reg; if (!XE_GT_WA(gt, 16023588340)) @@ -134,15 +131,13 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (xe_gt_is_media_type(gt)) return; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } static void gt_reset_worker(struct work_struct *w); @@ -389,7 +384,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) int xe_gt_init_early(struct xe_gt *gt) { - unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -436,13 +430,12 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; xe_gt_mcr_init_early(gt); xe_pat_init(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -460,16 +453,15 @@ static void dump_pat_on_error(struct xe_gt *gt) static int gt_init_with_gt_forcewake(struct xe_gt *gt) { - unsigned int fw_ref; int err; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; err = xe_uc_init(>->uc); if (err) - goto err_force_wake; + return err; xe_gt_topology_init(gt); xe_gt_mcr_init(gt); @@ -478,7 +470,7 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) - goto err_force_wake; + return err; if (IS_SRIOV_PF(gt_to_xe(gt))) xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); } @@ -492,17 +484,17 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) err = xe_hw_engines_init_early(gt); if (err) { dump_pat_on_error(gt); - goto err_force_wake; + return err; } err = xe_hw_engine_class_sysfs_init(gt); if (err) - goto err_force_wake; + return err; /* Initialize CCS mode sysfs after early initialization of HW engines */ err = xe_gt_ccs_mode_sysfs_init(gt); if (err) - goto err_force_wake; + return err; /* * Stash hardware-reported version. Since this register does not exist @@ -510,25 +502,16 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) */ gt->info.gmdid = xe_mmio_read32(>->mmio, GMD_ID); - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return err; } static int gt_init_with_all_forcewake(struct xe_gt *gt) { - unsigned int fw_ref; int err; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - err = -ETIMEDOUT; - goto err_force_wake; - } + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) + return -ETIMEDOUT; xe_gt_mcr_set_implicit_defaults(gt); xe_wa_process_gt(gt); @@ -537,20 +520,20 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) err = xe_gt_clock_init(gt); if (err) - goto err_force_wake; + return err; xe_mocs_init(gt); err = xe_execlist_init(gt); if (err) - goto err_force_wake; + return err; err = xe_hw_engines_init(gt); if (err) - goto err_force_wake; + return err; err = xe_uc_init_post_hwconfig(>->uc); if (err) - goto err_force_wake; + return err; if (xe_gt_is_main_type(gt)) { /* @@ -561,10 +544,8 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), IS_DGFX(xe) ? SZ_1M : SZ_512K, 16); - if (IS_ERR(gt->usm.bb_pool)) { - err = PTR_ERR(gt->usm.bb_pool); - goto err_force_wake; - } + if (IS_ERR(gt->usm.bb_pool)) + return PTR_ERR(gt->usm.bb_pool); } } @@ -573,12 +554,12 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) err = xe_migrate_init(tile->migrate); if (err) - goto err_force_wake; + return err; } err = xe_uc_load_hw(>->uc); if (err) - goto err_force_wake; + return err; /* Configure default CCS mode of 1 engine with all resources */ if (xe_gt_ccs_mode_enabled(gt)) { @@ -592,14 +573,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_init_hw(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return 0; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return err; } static void xe_gt_fini(void *arg) @@ -902,56 +876,42 @@ void xe_gt_reset_async(struct xe_gt *gt) void xe_gt_suspend_prepare(struct xe_gt *gt) { - unsigned int fw_ref; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_uc_suspend_prepare(>->uc); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } int xe_gt_suspend(struct xe_gt *gt) { - unsigned int fw_ref; int err; xe_gt_dbg(gt, "suspending\n"); xe_gt_sanitize(gt); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - goto err_msg; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT)); + return -ETIMEDOUT; + } err = xe_uc_suspend(>->uc); - if (err) - goto err_force_wake; + if (err) { + xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); + return err; + } xe_gt_idle_disable_pg(gt); xe_gt_disable_host_l2_vram(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "suspended\n"); return 0; - -err_msg: - err = -ETIMEDOUT; -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); - - return err; } void xe_gt_shutdown(struct xe_gt *gt) { - unsigned int fw_ref; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); do_gt_reset(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** @@ -976,32 +936,72 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) int xe_gt_resume(struct xe_gt *gt) { - unsigned int fw_ref; int err; xe_gt_dbg(gt, "resuming\n"); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - goto err_msg; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT)); + return -ETIMEDOUT; + } err = do_gt_restart(gt); if (err) - goto err_force_wake; + return err; xe_gt_idle_enable_pg(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_gt_dbg(gt, "resumed\n"); return 0; +} -err_msg: - err = -ETIMEDOUT; -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); +/** + * xe_gt_runtime_suspend() - GT runtime suspend + * @gt: the GT object + * + * Return: 0 on success, negative error code otherwise. + */ +int xe_gt_runtime_suspend(struct xe_gt *gt) +{ + xe_gt_dbg(gt, "runtime suspending\n"); - return err; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + xe_gt_err(gt, "runtime suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT)); + return -ETIMEDOUT; + } + + xe_uc_runtime_suspend(>->uc); + xe_gt_disable_host_l2_vram(gt); + + xe_gt_dbg(gt, "runtime suspended\n"); + + return 0; +} + +/** + * xe_gt_runtime_resume() - GT runtime resume + * @gt: the GT object + * + * Return: 0 on success, negative error code otherwise. + */ +int xe_gt_runtime_resume(struct xe_gt *gt) +{ + xe_gt_dbg(gt, "runtime resuming\n"); + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + xe_gt_err(gt, "runtime resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT)); + return -ETIMEDOUT; + } + + xe_gt_enable_host_l2_vram(gt); + xe_uc_runtime_resume(>->uc); + + xe_gt_dbg(gt, "runtime resumed\n"); + + return 0; } struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 9d710049da45..94969ddd9d88 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -58,6 +58,8 @@ int xe_gt_suspend(struct xe_gt *gt); void xe_gt_shutdown(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); +int xe_gt_runtime_resume(struct xe_gt *gt); +int xe_gt_runtime_suspend(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); int xe_gt_sanitize_freq(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index e4fd632f43cf..e4f38b5150fc 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -105,35 +105,24 @@ int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct xe_gt *gt = node_to_gt(node); struct xe_device *xe = gt_to_xe(gt); - int ret; - xe_pm_runtime_get(xe); - ret = xe_gt_debugfs_simple_show(m, data); - xe_pm_runtime_put(xe); - - return ret; + guard(xe_pm_runtime)(xe); + return xe_gt_debugfs_simple_show(m, data); } static int hw_engines(struct xe_gt *gt, struct drm_printer *p) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - unsigned int fw_ref; - int ret = 0; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - ret = -ETIMEDOUT; - goto fw_put; - } + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) + return -ETIMEDOUT; for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); -fw_put: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return ret; + return 0; } static int steering(struct xe_gt *gt, struct drm_printer *p) @@ -220,6 +209,7 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "default_lrc_vcs", .show = xe_gt_debugfs_show_with_rpm, .data = vcs_default_lrc }, { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc }, { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig }, + { "pat_sw_config", .show = xe_gt_debugfs_simple_show, .data = xe_pat_dump_sw_config }, }; /* everything else should be added here */ @@ -269,9 +259,8 @@ static void force_reset(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); xe_gt_reset_async(gt); - xe_pm_runtime_put(xe); } static ssize_t force_reset_write(struct file *file, @@ -297,9 +286,8 @@ static void force_reset_sync(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); xe_gt_reset(gt); - xe_pm_runtime_put(xe); } static ssize_t force_reset_sync_write(struct file *file, diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index ce3c7810469f..a40dd074106f 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -70,9 +70,8 @@ static ssize_t act_freq_show(struct kobject *kobj, struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); freq = xe_guc_pc_get_act_freq(pc); - xe_pm_runtime_put(dev_to_xe(dev)); return sysfs_emit(buf, "%d\n", freq); } @@ -86,9 +85,8 @@ static ssize_t cur_freq_show(struct kobject *kobj, u32 freq; ssize_t ret; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); ret = xe_guc_pc_get_cur_freq(pc, &freq); - xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -113,9 +111,8 @@ static ssize_t rpe_freq_show(struct kobject *kobj, struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); freq = xe_guc_pc_get_rpe_freq(pc); - xe_pm_runtime_put(dev_to_xe(dev)); return sysfs_emit(buf, "%d\n", freq); } @@ -128,9 +125,8 @@ static ssize_t rpa_freq_show(struct kobject *kobj, struct xe_guc_pc *pc = dev_to_pc(dev); u32 freq; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); freq = xe_guc_pc_get_rpa_freq(pc); - xe_pm_runtime_put(dev_to_xe(dev)); return sysfs_emit(buf, "%d\n", freq); } @@ -154,9 +150,8 @@ static ssize_t min_freq_show(struct kobject *kobj, u32 freq; ssize_t ret; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); ret = xe_guc_pc_get_min_freq(pc, &freq); - xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -175,9 +170,8 @@ static ssize_t min_freq_store(struct kobject *kobj, if (ret) return ret; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); ret = xe_guc_pc_set_min_freq(pc, freq); - xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -193,9 +187,8 @@ static ssize_t max_freq_show(struct kobject *kobj, u32 freq; ssize_t ret; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); ret = xe_guc_pc_get_max_freq(pc, &freq); - xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -214,9 +207,8 @@ static ssize_t max_freq_store(struct kobject *kobj, if (ret) return ret; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); ret = xe_guc_pc_set_max_freq(pc, freq); - xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -243,9 +235,8 @@ static ssize_t power_profile_store(struct kobject *kobj, struct xe_guc_pc *pc = dev_to_pc(dev); int err; - xe_pm_runtime_get(dev_to_xe(dev)); + guard(xe_pm_runtime)(dev_to_xe(dev)); err = xe_guc_pc_set_power_profile(pc, buff); - xe_pm_runtime_put(dev_to_xe(dev)); return err ?: count; } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 3e3d1d52f630..c1c9bec3c487 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -105,7 +105,6 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) struct xe_gt_idle *gtidle = >->gtidle; struct xe_mmio *mmio = >->mmio; u32 vcs_mask, vecs_mask; - unsigned int fw_ref; int i, j; if (IS_SRIOV_VF(xe)) @@ -137,7 +136,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) } } - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); if (xe->info.skip_guc_pc) { /* * GuC sets the hysteresis value when GuC PC is enabled @@ -154,13 +153,11 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) VDN_MFXVDENC_POWERGATE_ENABLE(2)); xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable); - xe_force_wake_put(gt_to_fw(gt), fw_ref); } void xe_gt_idle_disable_pg(struct xe_gt *gt) { struct xe_gt_idle *gtidle = >->gtidle; - unsigned int fw_ref; if (IS_SRIOV_VF(gt_to_xe(gt))) return; @@ -168,9 +165,8 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); gtidle->powergate_enable = 0; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); xe_mmio_write32(>->mmio, POWERGATE_ENABLE, gtidle->powergate_enable); - xe_force_wake_put(gt_to_fw(gt), fw_ref); } /** @@ -189,7 +185,6 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) enum xe_gt_idle_state state; u32 pg_enabled, pg_status = 0; u32 vcs_mask, vecs_mask; - unsigned int fw_ref; int n; /* * Media Slices @@ -226,14 +221,12 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) /* Do not wake the GT to read powergating status */ if (state != GT_IDLE_C6) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; pg_enabled = xe_mmio_read32(>->mmio, POWERGATE_ENABLE); pg_status = xe_mmio_read32(>->mmio, POWERGATE_DOMAIN_STATUS); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) { @@ -271,13 +264,9 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct xe_gt_idle *gtidle = dev_to_gtidle(dev); struct xe_guc_pc *pc = gtidle_to_pc(gtidle); - ssize_t ret; - xe_pm_runtime_get(pc_to_xe(pc)); - ret = sysfs_emit(buff, "%s\n", gtidle->name); - xe_pm_runtime_put(pc_to_xe(pc)); - - return ret; + guard(xe_pm_runtime)(pc_to_xe(pc)); + return sysfs_emit(buff, "%s\n", gtidle->name); } static struct kobj_attribute name_attr = __ATTR_RO(name); @@ -289,9 +278,8 @@ static ssize_t idle_status_show(struct kobject *kobj, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; - xe_pm_runtime_get(pc_to_xe(pc)); - state = gtidle->idle_status(pc); - xe_pm_runtime_put(pc_to_xe(pc)); + scoped_guard(xe_pm_runtime, pc_to_xe(pc)) + state = gtidle->idle_status(pc); return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } @@ -319,9 +307,8 @@ static ssize_t idle_residency_ms_show(struct kobject *kobj, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; - xe_pm_runtime_get(pc_to_xe(pc)); - residency = xe_gt_idle_residency_msec(gtidle); - xe_pm_runtime_put(pc_to_xe(pc)); + scoped_guard(xe_pm_runtime, pc_to_xe(pc)) + residency = xe_gt_idle_residency_msec(gtidle); return sysfs_emit(buff, "%llu\n", residency); } @@ -404,21 +391,17 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) int xe_gt_idle_disable_c6(struct xe_gt *gt) { - unsigned int fw_ref; - xe_device_assert_mem_access(gt_to_xe(gt)); if (IS_SRIOV_VF(gt_to_xe(gt))) return 0; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; xe_mmio_write32(>->mmio, RC_CONTROL, 0); xe_mmio_write32(>->mmio, RC_STATE, 0); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 59c5c6b4d994..6e8507c24986 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -269,7 +269,8 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, } /* Return: number of configuration dwords written */ -static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_config(struct xe_gt *gt, u32 *cfg, const struct xe_gt_sriov_config *config, + bool details) { u32 n = 0; @@ -303,9 +304,11 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); cfg[n++] = config->preempt_timeout; -#define encode_threshold_config(TAG, ...) ({ \ - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \ - cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \ +#define encode_threshold_config(TAG, NAME, VER...) ({ \ + if (IF_ARGS(GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, VER), true, VER)) { \ + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \ + cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \ + } \ }); MAKE_XE_GUC_KLV_THRESHOLDS_SET(encode_threshold_config); @@ -328,7 +331,7 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) return -ENOBUFS; cfg = xe_guc_buf_cpu_ptr(buf); - num_dwords = encode_config(cfg, config, true); + num_dwords = encode_config(gt, cfg, config, true); xe_gt_assert(gt, num_dwords <= max_cfg_dwords); if (xe_gt_is_media_type(gt)) { @@ -2518,7 +2521,7 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu ret = -ENOBUFS; } else { config = pf_pick_vf_config(gt, vfid); - ret = encode_config(buf, config, false) * sizeof(u32); + ret = encode_config(gt, buf, config, false) * sizeof(u32); } } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); @@ -2551,11 +2554,13 @@ static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid, return pf_provision_preempt_timeout(gt, vfid, value[0]); /* auto-generate case statements */ -#define define_threshold_key_to_provision_case(TAG, ...) \ +#define define_threshold_key_to_provision_case(TAG, NAME, VER...) \ case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \ BUILD_BUG_ON(MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) != 1u); \ if (len != MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG)) \ return -EBADMSG; \ + if (IF_ARGS(!GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, VER), false, VER)) \ + return -EKEYREJECTED; \ return pf_provision_threshold(gt, vfid, \ MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG), \ value[0]); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 5278ea4fd655..ece9eed5d7c5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -21,6 +21,7 @@ #include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" +#include "xe_guc.h" #include "xe_pm.h" #include "xe_sriov_pf.h" #include "xe_sriov_pf_provision.h" @@ -123,11 +124,10 @@ static int POLICY##_set(void *data, u64 val) \ if (val > (TYPE)~0ull) \ return -EOVERFLOW; \ \ - xe_pm_runtime_get(xe); \ + guard(xe_pm_runtime)(xe); \ err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \ if (!err) \ xe_sriov_pf_provision_set_custom_mode(xe); \ - xe_pm_runtime_put(xe); \ \ return err; \ } \ @@ -189,12 +189,11 @@ static int CONFIG##_set(void *data, u64 val) \ if (val > (TYPE)~0ull) \ return -EOVERFLOW; \ \ - xe_pm_runtime_get(xe); \ + guard(xe_pm_runtime)(xe); \ err = xe_sriov_pf_wait_ready(xe) ?: \ xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ if (!err) \ xe_sriov_pf_provision_set_custom_mode(xe); \ - xe_pm_runtime_put(xe); \ \ return err; \ } \ @@ -249,11 +248,10 @@ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index in if (val > (u32)~0ull) return -EOVERFLOW; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val); if (!err) xe_sriov_pf_provision_set_custom_mode(xe); - xe_pm_runtime_put(xe); return err; } @@ -304,9 +302,11 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne &sched_priority_fops); /* register all threshold attributes */ -#define register_threshold_attribute(TAG, NAME, ...) \ - debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \ - &NAME##_fops); +#define register_threshold_attribute(TAG, NAME, VER...) ({ \ + if (IF_ARGS(GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, VER), true, VER)) \ + debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \ + &NAME##_fops); \ +}); MAKE_XE_GUC_KLV_THRESHOLDS_SET(register_threshold_attribute) #undef register_threshold_attribute } @@ -358,9 +358,8 @@ static ssize_t control_write(struct file *file, const char __user *buf, size_t c xe_gt_assert(gt, sizeof(cmd) > strlen(control_cmds[n].cmd)); if (sysfs_streq(cmd, control_cmds[n].cmd)) { - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = control_cmds[n].fn ? (*control_cmds[n].fn)(gt, vfid) : 0; - xe_pm_runtime_put(xe); break; } } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 3174a8dee779..7410e7b93256 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -1026,7 +1026,7 @@ static void action_ring_cleanup(void *arg) static void pf_gt_migration_check_support(struct xe_gt *gt) { - if (GUC_FIRMWARE_VER(>->uc.guc) < MAKE_GUC_VER(70, 54, 0)) + if (!GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 54)) xe_sriov_pf_migration_disable(gt_to_xe(gt), "requires GuC version >= 70.54.0"); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 033eae2d03d3..b8b391cfc8eb 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -41,6 +42,37 @@ #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) +#ifdef CONFIG_DRM_XE_DEBUG +enum VF_MIGRATION_WAIT_POINTS { + VF_MIGRATION_WAIT_RESFIX_START = BIT(0), + VF_MIGRATION_WAIT_FIXUPS = BIT(1), + VF_MIGRATION_WAIT_RESTART_JOBS = BIT(2), + VF_MIGRATION_WAIT_RESFIX_DONE = BIT(3), +}; + +#define VF_MIGRATION_WAIT_DELAY_IN_MS 1000 +static void vf_post_migration_inject_wait(struct xe_gt *gt, + enum VF_MIGRATION_WAIT_POINTS wait) +{ + while (gt->sriov.vf.migration.debug.resfix_stoppers & wait) { + xe_gt_dbg(gt, + "*TESTING* injecting %u ms delay due to resfix_stoppers=%#x, to continue clear %#x\n", + VF_MIGRATION_WAIT_DELAY_IN_MS, + gt->sriov.vf.migration.debug.resfix_stoppers, wait); + + msleep(VF_MIGRATION_WAIT_DELAY_IN_MS); + } +} + +#define VF_MIGRATION_INJECT_WAIT(gt, _POS) ({ \ + struct xe_gt *__gt = (gt); \ + vf_post_migration_inject_wait(__gt, VF_MIGRATION_WAIT_##_POS); \ + }) + +#else +#define VF_MIGRATION_INJECT_WAIT(_gt, ...) typecheck(struct xe_gt *, (_gt)) +#endif + static int guc_action_vf_reset(struct xe_guc *guc) { u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { @@ -299,12 +331,13 @@ void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, *found = gt->sriov.vf.guc_version; } -static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) +static int guc_action_vf_resfix_start(struct xe_guc *guc, u16 marker) { u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | - FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_RESFIX_START) | + FIELD_PREP(VF2GUC_RESFIX_START_REQUEST_MSG_0_MARKER, marker), }; int ret; @@ -313,28 +346,43 @@ static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) return ret > 0 ? -EPROTO : ret; } -/** - * vf_notify_resfix_done - Notify GuC about resource fixups apply completed. - * @gt: the &xe_gt struct instance linked to target GuC - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. - */ -static int vf_notify_resfix_done(struct xe_gt *gt) +static int vf_resfix_start(struct xe_gt *gt, u16 marker) { struct xe_guc *guc = >->uc.guc; - int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - err = guc_action_vf_notify_resfix_done(guc); - if (unlikely(err)) - xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", - ERR_PTR(err)); - else - xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); + VF_MIGRATION_INJECT_WAIT(gt, RESFIX_START); - return err; + xe_gt_sriov_dbg_verbose(gt, "Sending resfix start marker %u\n", marker); + + return guc_action_vf_resfix_start(guc, marker); +} + +static int guc_action_vf_resfix_done(struct xe_guc *guc, u16 marker) +{ + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_RESFIX_DONE) | + FIELD_PREP(VF2GUC_RESFIX_DONE_REQUEST_MSG_0_MARKER, marker), + }; + int ret; + + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int vf_resfix_done(struct xe_gt *gt, u16 marker) +{ + struct xe_guc *guc = >->uc.guc; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + xe_gt_sriov_dbg_verbose(gt, "Sending resfix done marker %u\n", marker); + + return guc_action_vf_resfix_done(guc, marker); } static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, @@ -1123,12 +1171,8 @@ static bool vf_post_migration_shutdown(struct xe_gt *gt) return true; } - spin_lock_irq(>->sriov.vf.migration.lock); - gt->sriov.vf.migration.recovery_queued = false; - spin_unlock_irq(>->sriov.vf.migration.lock); - xe_guc_ct_flush_and_stop(>->uc.guc.ct); - xe_guc_submit_pause(>->uc.guc); + xe_guc_submit_pause_vf(>->uc.guc); xe_tlb_inval_reset(>->tlb_inval); return false; @@ -1144,6 +1188,8 @@ static int vf_post_migration_fixups(struct xe_gt *gt) void *buf = gt->sriov.vf.migration.scratch; int err; + VF_MIGRATION_INJECT_WAIT(gt, FIXUPS); + /* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */ err = xe_gt_sriov_vf_query_config(gt); if (err) @@ -1162,13 +1208,22 @@ static int vf_post_migration_fixups(struct xe_gt *gt) static void vf_post_migration_rearm(struct xe_gt *gt) { + VF_MIGRATION_INJECT_WAIT(gt, RESTART_JOBS); + + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(gt_to_xe(gt)); + xe_guc_ct_restart(>->uc.guc.ct); - xe_guc_submit_unpause_prepare(>->uc.guc); + xe_guc_submit_unpause_prepare_vf(>->uc.guc); } static void vf_post_migration_kickstart(struct xe_gt *gt) { - xe_guc_submit_unpause(>->uc.guc); + xe_guc_submit_unpause_vf(>->uc.guc); } static void vf_post_migration_abort(struct xe_gt *gt) @@ -1183,37 +1238,49 @@ static void vf_post_migration_abort(struct xe_gt *gt) xe_guc_submit_pause_abort(>->uc.guc); } -static int vf_post_migration_notify_resfix_done(struct xe_gt *gt) +static int vf_post_migration_resfix_done(struct xe_gt *gt, u16 marker) { - bool skip_resfix = false; + VF_MIGRATION_INJECT_WAIT(gt, RESFIX_DONE); spin_lock_irq(>->sriov.vf.migration.lock); - if (gt->sriov.vf.migration.recovery_queued) { - skip_resfix = true; - xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n"); - } else { + if (gt->sriov.vf.migration.recovery_queued) + xe_gt_sriov_dbg(gt, "another recovery imminent\n"); + else WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); - } spin_unlock_irq(>->sriov.vf.migration.lock); - if (skip_resfix) - return -EAGAIN; + return vf_resfix_done(gt, marker); +} - /* - * Make sure interrupts on the new HW are properly set. The GuC IRQ - * must be working at this point, since the recovery did started, - * but the rest was not enabled using the procedure from spec. - */ - xe_irq_resume(gt_to_xe(gt)); +static int vf_post_migration_resfix_start(struct xe_gt *gt, u16 marker) +{ + int err; - return vf_notify_resfix_done(gt); + err = vf_resfix_start(gt, marker); + + guard(spinlock_irq) (>->sriov.vf.migration.lock); + gt->sriov.vf.migration.recovery_queued = false; + + return err; +} + +static u16 vf_post_migration_next_resfix_marker(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + BUILD_BUG_ON(1 + ((typeof(gt->sriov.vf.migration.resfix_marker))~0) > + FIELD_MAX(VF2GUC_RESFIX_START_REQUEST_MSG_0_MARKER)); + + /* add 1 to avoid zero-marker */ + return 1 + gt->sriov.vf.migration.resfix_marker++; } static void vf_post_migration_recovery(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - int err; + u16 marker; bool retry; + int err; xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); @@ -1227,15 +1294,30 @@ static void vf_post_migration_recovery(struct xe_gt *gt) goto fail; } + marker = vf_post_migration_next_resfix_marker(gt); + + err = vf_post_migration_resfix_start(gt, marker); + if (unlikely(err)) { + xe_gt_sriov_err(gt, "Recovery failed at GuC RESFIX_START step (%pe)\n", + ERR_PTR(err)); + goto fail; + } + err = vf_post_migration_fixups(gt); if (err) goto fail; vf_post_migration_rearm(gt); - err = vf_post_migration_notify_resfix_done(gt); - if (err && err != -EAGAIN) + err = vf_post_migration_resfix_done(gt, marker); + if (err) { + if (err == -EREMCHG) + goto queue; + + xe_gt_sriov_err(gt, "Recovery failed at GuC RESFIX_DONE step (%pe)\n", + ERR_PTR(err)); goto fail; + } vf_post_migration_kickstart(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c index 2ed5b6780d30..507718326e1f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c @@ -69,4 +69,16 @@ void xe_gt_sriov_vf_debugfs_register(struct xe_gt *gt, struct dentry *root) vfdentry->d_inode->i_private = gt; drm_debugfs_create_files(vf_info, ARRAY_SIZE(vf_info), vfdentry, minor); + + /* + * /sys/kernel/debug/dri/BDF/ + * ├── tile0 + * ├── gt0 + * ├── vf + * ├── resfix_stoppers + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + debugfs_create_x8("resfix_stoppers", 0600, vfdentry, + >->sriov.vf.migration.debug.resfix_stoppers); + } } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 420b0e6089de..510c33116fbd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -52,6 +52,19 @@ struct xe_gt_sriov_vf_migration { wait_queue_head_t wq; /** @scratch: Scratch memory for VF recovery */ void *scratch; + /** @debug: Debug hooks for delaying migration */ + struct { + /** + * @debug.resfix_stoppers: Stop and wait at different stages + * during post migration recovery + */ + u8 resfix_stoppers; + } debug; + /** + * @resfix_marker: Marker sent on start and on end of post-migration + * steps. + */ + u8 resfix_marker; /** @recovery_teardown: VF post migration recovery is being torn down */ bool recovery_teardown; /** @recovery_queued: VF post migration recovery in queued */ diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 5f74706bab81..fb2904bd0abd 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -66,6 +66,16 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"), DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"), DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"), + DEF_STAT_STR(HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, + "hw_engine_group_suspend_lr_queue_count"), + DEF_STAT_STR(HW_ENGINE_GROUP_SKIP_LR_QUEUE_COUNT, + "hw_engine_group_skip_lr_queue_count"), + DEF_STAT_STR(HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT, + "hw_engine_group_wait_dma_queue_count"), + DEF_STAT_STR(HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_US, + "hw_engine_group_suspend_lr_queue_us"), + DEF_STAT_STR(HW_ENGINE_GROUP_WAIT_DMA_QUEUE_US, + "hw_engine_group_wait_dma_queue_us"), }; /** diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index e8aea32bc971..59a7bf60e242 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_STATS_H_ #define _XE_GT_STATS_H_ +#include + #include "xe_gt_stats_types.h" struct xe_gt; @@ -23,4 +25,34 @@ xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, } #endif + +/** + * xe_gt_stats_ktime_us_delta() - Get delta in microseconds between now and a + * start time + * @start: Start time + * + * Helper for GT stats to get delta in microseconds between now and a start + * time, compiles out if GT stats are disabled. + * + * Return: Delta in microseconds between now and a start time + */ +static inline s64 xe_gt_stats_ktime_us_delta(ktime_t start) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? + ktime_us_delta(ktime_get(), start) : 0; +} + +/** + * xe_gt_stats_ktime_get() - Get current ktime + * + * Helper for GT stats to get current ktime, compiles out if GT stats are + * disabled. + * + * Return: Get current ktime + */ +static inline ktime_t xe_gt_stats_ktime_get(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index d8348a8de2e1..b92d013091d5 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -44,6 +44,11 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_4K_BIND_US, XE_GT_STATS_ID_SVM_64K_BIND_US, XE_GT_STATS_ID_SVM_2M_BIND_US, + XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, + XE_GT_STATS_ID_HW_ENGINE_GROUP_SKIP_LR_QUEUE_COUNT, + XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT, + XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_US, + XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_US, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 01477fc7b37b..570358310e97 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -85,7 +85,7 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct xe_reg reg; - u32 val, mask; + u32 mask; if (xe_gt_is_media_type(gt)) reg = MTL_MEDIA_PERF_LIMIT_REASONS; @@ -97,11 +97,8 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) else mask = GT0_PERF_LIMIT_REASONS_MASK; - xe_pm_runtime_get(xe); - val = xe_mmio_read32(>->mmio, reg) & mask; - xe_pm_runtime_put(xe); - - return val; + guard(xe_pm_runtime)(xe); + return xe_mmio_read32(>->mmio, reg) & mask; } static bool is_throttled_by(struct xe_gt *gt, u32 mask) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 0a728180b6fe..5318d92fd473 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -140,6 +140,11 @@ struct xe_gt { u64 engine_mask; /** @info.gmdid: raw GMD_ID value from hardware */ u32 gmdid; + /** + * @multi_queue_engine_class_mask: Bitmask of engine classes with + * multi queue support enabled. + */ + u16 multi_queue_engine_class_mask; /** @info.id: Unique ID of this GT within the PCI Device */ u8 id; /** @info.has_indirect_ring_state: GT has indirect ring state support */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index a686b04879d6..09ac092c3687 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -104,7 +104,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT; u32 flags; - #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) + #if (((XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE) % SZ_1M) == 0) #define LOG_UNIT SZ_1M #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS #else @@ -112,7 +112,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) #define LOG_FLAG 0 #endif - #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) + #if (((XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) #define CAPTURE_UNIT SZ_1M #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS #else @@ -120,20 +120,21 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) #define CAPTURE_FLAG 0 #endif - BUILD_BUG_ON(!CRASH_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); - BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); - BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); + BUILD_BUG_ON(!XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | CAPTURE_FLAG | LOG_FLAG | - FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) | - FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) | - FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) | + FIELD_PREP(GUC_LOG_CRASH_DUMP, XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_EVENT_DATA, XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_STATE_CAPTURE, XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE / + CAPTURE_UNIT - 1) | FIELD_PREP(GUC_LOG_BUF_ADDR, offset); #undef LOG_UNIT @@ -660,11 +661,9 @@ static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; struct xe_gt *gt = guc_to_gt(guc); - unsigned int fw_ref; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); - xe_force_wake_put(gt_to_fw(gt), fw_ref); + xe_with_force_wake(fw_ref, gt_to_fw(gt), XE_FORCEWAKE_ALL) + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); guc_g2g_fini(guc); } @@ -768,6 +767,10 @@ int xe_guc_init(struct xe_guc *guc) if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; + /* Disable page reclaim if GuC FW does not support */ + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 14, 0)) + xe->info.has_page_reclaim_hw_assist = false; + if (IS_SRIOV_VF(xe)) { ret = xe_guc_ct_init(&guc->ct); if (ret) @@ -1485,6 +1488,12 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + if (unlikely(error == XE_GUC_RESPONSE_VF_MIGRATED)) { + xe_gt_dbg(gt, "GuC mmio request %#x rejected due to MIGRATION (hint %#x)\n", + request[0], hint); + return -EREMCHG; + } + xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n", request[0], error, hint); return -ENXIO; @@ -1618,18 +1627,51 @@ int xe_guc_start(struct xe_guc *guc) return xe_guc_submit_start(guc); } +/** + * xe_guc_runtime_suspend() - GuC runtime suspend + * @guc: The GuC object + * + * Stop further runs of submission tasks on given GuC and runtime suspend + * GuC CT. + */ +void xe_guc_runtime_suspend(struct xe_guc *guc) +{ + xe_guc_submit_pause(guc); + xe_guc_submit_disable(guc); + xe_guc_ct_runtime_suspend(&guc->ct); +} + +/** + * xe_guc_runtime_resume() - GuC runtime resume + * @guc: The GuC object + * + * Runtime resume GuC CT and allow further runs of submission tasks on + * given GuC. + */ +void xe_guc_runtime_resume(struct xe_guc *guc) +{ + /* + * Runtime PM flows are not applicable for VFs, so it's safe to + * directly enable IRQ. + */ + guc_enable_irq(guc); + + xe_guc_ct_runtime_resume(&guc->ct); + xe_guc_submit_enable(guc); + xe_guc_submit_unpause(guc); +} + void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) { struct xe_gt *gt = guc_to_gt(guc); - unsigned int fw_ref; u32 status; int i; xe_uc_fw_print(&guc->fw, p); if (!IS_SRIOV_VF(gt_to_xe(gt))) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; status = xe_mmio_read32(>->mmio, GUC_STATUS); @@ -1649,8 +1691,6 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) drm_printf(p, "\t%2d: \t0x%x\n", i, xe_mmio_read32(>->mmio, SOFT_SCRATCH(i))); } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } drm_puts(p, "\n"); diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index e2d4c5f44ae3..a169f231cbd8 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -18,10 +18,16 @@ */ #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define MAKE_GUC_VER_ARGS(ver...) \ + (BUILD_BUG_ON_ZERO(COUNT_ARGS(ver) < 2 || COUNT_ARGS(ver) > 3) + \ + MAKE_GUC_VER(PICK_ARG1(ver), PICK_ARG2(ver), IF_ARGS(PICK_ARG3(ver), 0, PICK_ARG3(ver)))) + #define GUC_SUBMIT_VER(guc) \ MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) #define GUC_FIRMWARE_VER(guc) \ MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) +#define GUC_FIRMWARE_VER_AT_LEAST(guc, ver...) \ + xe_guc_fw_version_at_least((guc), MAKE_GUC_VER_ARGS(ver)) struct drm_printer; @@ -35,6 +41,8 @@ int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); int xe_guc_opt_in_features_enable(struct xe_guc *guc); +void xe_guc_runtime_suspend(struct xe_guc *guc); +void xe_guc_runtime_resume(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); @@ -94,4 +102,19 @@ static inline struct drm_device *guc_to_drm(struct xe_guc *guc) return &guc_to_xe(guc)->drm; } +/** + * xe_guc_fw_version_at_least() - Check if GuC is at least of given version. + * @guc: the &xe_guc + * @ver: the version to check + * + * The @ver should be prepared using MAKE_GUC_VER(major, minor, patch). + * + * Return: true if loaded GuC firmware is at least of given version, + * false otherwise. + */ +static inline bool xe_guc_fw_version_at_least(const struct xe_guc *guc, u32 ver) +{ + return GUC_FIRMWARE_VER(guc) >= ver; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index bcb85a1bf26d..5feeb91426ee 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -317,7 +317,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads) offset = guc_ads_waklv_offset(ads); remain = guc_ads_waklv_size(ads); - if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562)) + if (XE_GT_WA(gt, 16021333562)) guc_waklv_enable(ads, NULL, 0, &offset, &remain, GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); if (XE_GT_WA(gt, 18024947630)) @@ -347,10 +347,10 @@ static void guc_waklv_init(struct xe_guc_ads *ads) guc_waklv_enable(ads, NULL, 0, &offset, &remain, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET); - if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708)) + if (GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 44) && XE_GT_WA(gt, 16026508708)) guc_waklv_enable(ads, NULL, 0, &offset, &remain, GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH); - if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) { + if (GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 47) && XE_GT_WA(gt, 16026007364)) { u32 data[] = { 0x0, 0xF, diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 3ce442500130..c36fc31e0438 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size) struct xe_gt *gt = cache_to_gt(cache); struct xe_sa_manager *sam; - sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32)); + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0); if (IS_ERR(sam)) return PTR_ERR(sam); cache->sam = sam; diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 0c1fbe97b8bf..2cda92f7b323 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -843,7 +843,7 @@ static void check_guc_capture_size(struct xe_guc *guc) { int capture_size = guc_capture_output_size_est(guc); int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; - u32 buffer_size = xe_guc_log_section_size_capture(&guc->log); + u32 buffer_size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE; /* * NOTE: capture_size is much smaller than the capture region @@ -949,7 +949,7 @@ guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *no * ADS module also calls separately for PF vs VF. * * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param)) - * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small) + * Size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE (warns if on too-small) * Note2: 'x 3' to hold multiple capture groups * * GUC Runtime notify capture: @@ -1367,7 +1367,7 @@ static int __guc_capture_flushlog_complete(struct xe_guc *guc) { u32 action[] = { XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE, - GUC_LOG_BUFFER_CAPTURE + GUC_LOG_TYPE_STATE_CAPTURE }; return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); @@ -1384,8 +1384,8 @@ static void __guc_capture_process_output(struct xe_guc *guc) u32 log_buf_state_offset; u32 src_data_offset; - log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE; - src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE); + log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_TYPE_STATE_CAPTURE; + src_data_offset = XE_GUC_LOG_STATE_CAPTURE_OFFSET; /* * Make a copy of the state structure, inside GuC log buffer @@ -1395,15 +1395,15 @@ static void __guc_capture_process_output(struct xe_guc *guc) xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap, log_buf_state_offset, sizeof(struct guc_log_buffer_state)); - buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE); + buffer_size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE; read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags); /* Bookkeeping stuff */ tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags); - guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp; - new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE, + guc->log.stats[GUC_LOG_TYPE_STATE_CAPTURE].flush += tmp; + new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_TYPE_STATE_CAPTURE, full_count); /* Now copy the actual logs. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 4ac434ad216f..c3df9b3f1b4d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -42,6 +42,21 @@ static void ct_exit_safe_mode(struct xe_guc_ct *ct); static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state); +static struct xe_guc *ct_to_guc(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_guc, ct); +} + +static struct xe_gt *ct_to_gt(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_gt, uc.guc.ct); +} + +static struct xe_device *ct_to_xe(struct xe_guc_ct *ct) +{ + return gt_to_xe(ct_to_gt(ct)); +} + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -68,14 +83,101 @@ enum { static void ct_dead_worker_func(struct work_struct *w); static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code); -#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code) +static void ct_dead_fini(struct xe_guc_ct *ct) +{ + cancel_work_sync(&ct->dead.worker); +} + +static void ct_dead_init(struct xe_guc_ct *ct) +{ + spin_lock_init(&ct->dead.lock); + INIT_WORK(&ct->dead.worker, ct_dead_worker_func); + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + stack_depot_init(); +#endif +} + +static void fast_req_stack_save(struct xe_guc_ct *ct, unsigned int slot) +{ +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + unsigned long entries[SZ_32]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + /* May be called under spinlock, so avoid sleeping */ + ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); +#endif +} + +static void fast_req_dump(struct xe_guc_ct *ct, u16 fence, unsigned int slot) +{ + struct xe_gt *gt = ct_to_gt(ct); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + char *buf __cleanup(kfree) = kmalloc(SZ_4K, GFP_NOWAIT); + + if (buf && stack_depot_snprint(ct->fast_req[slot].stack, buf, SZ_4K, 0)) + xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s\n", + fence, ct->fast_req[slot].action, buf); + else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", + fence, ct->fast_req[slot].action); #else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", + fence, ct->fast_req[slot].action); +#endif +} + +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ + u16 fence_min = U16_MAX, fence_max = 0; + struct xe_gt *gt = ct_to_gt(ct); + unsigned int n; + + lockdep_assert_held(&ct->lock); + + for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { + if (ct->fast_req[n].fence < fence_min) + fence_min = ct->fast_req[n].fence; + if (ct->fast_req[n].fence > fence_max) + fence_max = ct->fast_req[n].fence; + + if (ct->fast_req[n].fence != fence) + continue; + + return fast_req_dump(ct, fence, n); + } + + xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", + fence, fence_min, fence_max, ct->fence_seqno); +} + +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ + unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); + + fast_req_stack_save(ct, slot); + ct->fast_req[slot].fence = fence; + ct->fast_req[slot].action = action; +} + +#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code) + +#else + +static void ct_dead_fini(struct xe_guc_ct *ct) { } +static void ct_dead_init(struct xe_guc_ct *ct) { } + +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) { } +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) { } + #define CT_DEAD(ct, ctb, reason) \ do { \ struct guc_ctb *_ctb = (ctb); \ if (_ctb) \ _ctb->info.broken = true; \ } while (0) + #endif /* Used when a CT send wants to block and / or receive data */ @@ -112,24 +214,6 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) return g2h_fence->seqno == ~0x0; } -static struct xe_guc * -ct_to_guc(struct xe_guc_ct *ct) -{ - return container_of(ct, struct xe_guc, ct); -} - -static struct xe_gt * -ct_to_gt(struct xe_guc_ct *ct) -{ - return container_of(ct, struct xe_gt, uc.guc.ct); -} - -static struct xe_device * -ct_to_xe(struct xe_guc_ct *ct) -{ - return gt_to_xe(ct_to_gt(ct)); -} - /** * DOC: GuC CTB Blob * @@ -169,8 +253,11 @@ ct_to_xe(struct xe_guc_ct *ct) #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2) #define CTB_H2G_BUFFER_SIZE (SZ_4K) +#define CTB_H2G_BUFFER_DWORDS (CTB_H2G_BUFFER_SIZE / sizeof(u32)) #define CTB_G2H_BUFFER_SIZE (SZ_128K) +#define CTB_G2H_BUFFER_DWORDS (CTB_G2H_BUFFER_SIZE / sizeof(u32)) #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) +#define G2H_ROOM_BUFFER_DWORDS (CTB_G2H_BUFFER_DWORDS / 2) /** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full @@ -199,9 +286,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) - cancel_work_sync(&ct->dead.worker); -#endif + ct_dead_fini(ct); ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); @@ -239,13 +324,8 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) - spin_lock_init(&ct->dead.lock); - INIT_WORK(&ct->dead.worker, ct_dead_worker_func); -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) - stack_depot_init(); -#endif -#endif + + ct_dead_init(ct); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); @@ -326,7 +406,7 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, struct iosys_map *map) { - h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32); + h2g->info.size = CTB_H2G_BUFFER_DWORDS; h2g->info.resv_space = 0; h2g->info.tail = 0; h2g->info.head = 0; @@ -344,8 +424,8 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, struct iosys_map *map) { - g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32); - g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); + g2h->info.size = CTB_G2H_BUFFER_DWORDS; + g2h->info.resv_space = G2H_ROOM_BUFFER_DWORDS; g2h->info.head = 0; g2h->info.tail = 0; g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head, @@ -640,6 +720,39 @@ void xe_guc_ct_stop(struct xe_guc_ct *ct) stop_g2h_handler(ct); } +/** + * xe_guc_ct_runtime_suspend() - GuC CT runtime suspend + * @ct: the &xe_guc_ct + * + * Set GuC CT to disabled state. + */ +void xe_guc_ct_runtime_suspend(struct xe_guc_ct *ct) +{ + struct guc_ctb *g2h = &ct->ctbs.g2h; + u32 credits = CIRC_SPACE(0, 0, CTB_G2H_BUFFER_DWORDS) - G2H_ROOM_BUFFER_DWORDS; + + /* We should be back to guc_ct_ctb_g2h_init() values */ + xe_gt_assert(ct_to_gt(ct), g2h->info.space == credits); + + /* + * Since we're already in runtime suspend path, we shouldn't have pending + * messages. But if there happen to be any, we'd probably want them to be + * thrown as errors for further investigation. + */ + xe_guc_ct_disable(ct); +} + +/** + * xe_guc_ct_runtime_resume() - GuC CT runtime resume + * @ct: the &xe_guc_ct + * + * Restart GuC CT and set it to enabled state. + */ +void xe_guc_ct_runtime_resume(struct xe_guc_ct *ct) +{ + xe_guc_ct_restart(ct); +} + static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) { struct guc_ctb *h2g = &ct->ctbs.h2g; @@ -747,28 +860,6 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) spin_unlock_irq(&ct->fast_lock); } -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) -{ - unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) - unsigned long entries[SZ_32]; - unsigned int n; - - n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); - - /* May be called under spinlock, so avoid sleeping */ - ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); -#endif - ct->fast_req[slot].fence = fence; - ct->fast_req[slot].action = action; -} -#else -static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) -{ -} -#endif - /* * The CT protocol accepts a 16 bits fence. This field is fully owned by the * driver, the GuC will just copy it to the reply message. Since we need to @@ -1310,10 +1401,12 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) lockdep_assert_held(&ct->lock); switch (action) { + case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE: case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: g2h_release_space(ct, len); } @@ -1338,55 +1431,6 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) return 0; } -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -static void fast_req_report(struct xe_guc_ct *ct, u16 fence) -{ - u16 fence_min = U16_MAX, fence_max = 0; - struct xe_gt *gt = ct_to_gt(ct); - bool found = false; - unsigned int n; -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) - char *buf; -#endif - - lockdep_assert_held(&ct->lock); - - for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { - if (ct->fast_req[n].fence < fence_min) - fence_min = ct->fast_req[n].fence; - if (ct->fast_req[n].fence > fence_max) - fence_max = ct->fast_req[n].fence; - - if (ct->fast_req[n].fence != fence) - continue; - found = true; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) - buf = kmalloc(SZ_4K, GFP_NOWAIT); - if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0)) - xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s", - fence, ct->fast_req[n].action, buf); - else - xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", - fence, ct->fast_req[n].action); - kfree(buf); -#else - xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", - fence, ct->fast_req[n].action); -#endif - break; - } - - if (!found) - xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", - fence, fence_min, fence_max, ct->fence_seqno); -} -#else -static void fast_req_report(struct xe_guc_ct *ct, u16 fence) -{ -} -#endif - static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1549,6 +1593,15 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: + /* + * Page reclamation is an extension of TLB invalidation. Both + * operations share the same seqno and fence. When either + * action completes, we need to signal the corresponding + * fence. Since the handling logic (lookup fence by seqno, + * fence signalling) is identical, we use the same handler + * for both G2H events. + */ ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: @@ -1572,6 +1625,13 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_g2g_test_notification(guc, payload, adj_len); break; #endif + case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE: + ret = xe_guc_exec_queue_cgp_sync_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR: + ret = xe_guc_exec_queue_cgp_context_error_handler(guc, payload, + adj_len); + break; default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } @@ -1714,6 +1774,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) switch (action) { case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: break; /* Process these in fast-path */ default: return 0; @@ -1750,6 +1811,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: + /* + * Seqno and fence handling of page reclamation and TLB + * invalidation is identical, so we can use the same handler + * for both actions. + */ __g2h_release_space(ct, len); ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index ca1ce2b3c354..5599939f8fe1 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -17,6 +17,8 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); int xe_guc_ct_restart(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); +void xe_guc_ct_runtime_resume(struct xe_guc_ct *ct); +void xe_guc_ct_runtime_suspend(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 0b102ab46c4d..23827e87450f 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -70,13 +70,9 @@ static int guc_debugfs_show(struct seq_file *m, void *data) struct xe_gt *gt = grandparent->d_inode->i_private; struct xe_device *xe = gt_to_xe(gt); int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data; - int ret; - xe_pm_runtime_get(xe); - ret = print(>->uc.guc, &p); - xe_pm_runtime_put(xe); - - return ret; + guard(xe_pm_runtime)(xe); + return print(>->uc.guc, &p); } static int guc_log(struct xe_guc *guc, struct drm_printer *p) @@ -85,6 +81,12 @@ static int guc_log(struct xe_guc *guc, struct drm_printer *p) return 0; } +static int guc_log_lfd(struct xe_guc *guc, struct drm_printer *p) +{ + xe_guc_log_print_lfd(&guc->log, p); + return 0; +} + static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p) { xe_guc_log_print_dmesg(&guc->log); @@ -121,6 +123,7 @@ static const struct drm_info_list slpc_debugfs_list[] = { /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { { "guc_log", .show = guc_debugfs_show, .data = guc_log }, + { "guc_log_lfd", .show = guc_debugfs_show, .data = guc_log_lfd }, { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, }; diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index c90dd266e9cf..a04faec477ae 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -16,6 +16,8 @@ #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 #define G2H_LEN_DW_TLB_INVALIDATE 3 #define G2H_LEN_DW_G2G_NOTIFY_MIN 3 +#define G2H_LEN_DW_MULTI_QUEUE_CONTEXT 3 +#define G2H_LEN_DW_PAGE_RECLAMATION 3 #define GUC_ID_MAX 65535 #define GUC_ID_UNKNOWN 0xffffffff @@ -62,6 +64,8 @@ struct guc_ctxt_registration_info { u32 wq_base_lo; u32 wq_base_hi; u32 wq_size; + u32 cgp_lo; + u32 cgp_hi; u32 hwlrca_lo; u32 hwlrca_hi; }; @@ -91,9 +95,9 @@ struct guc_update_exec_queue_policy { #define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) #define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) #define GUC_LOG_LOG_ALLOC_UNITS BIT(3) -#define GUC_LOG_CRASH REG_GENMASK(5, 4) -#define GUC_LOG_DEBUG REG_GENMASK(9, 6) -#define GUC_LOG_CAPTURE REG_GENMASK(11, 10) +#define GUC_LOG_CRASH_DUMP REG_GENMASK(5, 4) +#define GUC_LOG_EVENT_DATA REG_GENMASK(9, 6) +#define GUC_LOG_STATE_CAPTURE REG_GENMASK(11, 10) #define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12) #define GUC_CTL_WA 1 diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h index 0a028c94756d..45ab5a3b5218 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h +++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h @@ -24,6 +24,11 @@ * ABI and the associated &NAME, that may be used in code or debugfs/sysfs:: * * define(TAG, NAME) + * + * If required, KLVs can be labeled with GuC firmware version that added them:: + * + * define(TAG, NAME, MAJOR, MINOR) + * define(TAG, NAME, MAJOR, MINOR, PATCH) */ #define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define) \ define(CAT_ERR, cat_error_count) \ @@ -32,6 +37,7 @@ define(H2G_STORM, guc_time_us) \ define(IRQ_STORM, irq_time_us) \ define(DOORBELL_STORM, doorbell_time_us) \ + define(MULTI_LRC_COUNT, multi_lrc_count, 70, 53)\ /* end */ /** diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index c01ccb35dc75..d7473b9673bb 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -7,8 +7,10 @@ #include +#include #include +#include "abi/guc_lfd_abi.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_devcoredump.h" @@ -19,6 +21,77 @@ #include "xe_mmio.h" #include "xe_module.h" +#define GUC_LOG_CHUNK_SIZE SZ_2M + +/* Magic keys define */ +#define GUC_LFD_DRIVER_KEY_STREAMING 0x8086AAAA474C5346 +#define GUC_LFD_LOG_BUFFER_MARKER_2 0xDEADFEED +#define GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2 0x8086DEAD +#define GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2 0xBEEFFEED +#define GUC_LFD_LOG_BUFFER_MARKER_1V2 0xCABBA9E6 +#define GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2 0xCABBA9F7 +#define GUC_LFD_DATA_HEADER_MAGIC 0x8086 + +/* LFD supported LIC type range */ +#define GUC_LIC_TYPE_FIRST GUC_LIC_TYPE_GUC_SW_VERSION +#define GUC_LIC_TYPE_LAST GUC_LIC_TYPE_BUILD_PLATFORM_ID +#define GUC_LFD_TYPE_FW_RANGE_FIRST GUC_LFD_TYPE_FW_VERSION +#define GUC_LFD_TYPE_FW_RANGE_LAST GUC_LFD_TYPE_BUILD_PLATFORM_ID + +#define GUC_LOG_BUFFER_STATE_HEADER_LENGTH 4096 +#define GUC_LOG_BUFFER_INIT_CONFIG 3 + +struct guc_log_buffer_entry_list { + u32 offset; + u32 rd_ptr; + u32 wr_ptr; + u32 wrap_offset; + u32 buf_size; +}; + +struct guc_lic_save { + u32 version; + /* + * Array of init config KLV values. + * Range from GUC_LOG_LIC_TYPE_FIRST to GUC_LOG_LIC_TYPE_LAST + */ + u32 values[GUC_LIC_TYPE_LAST - GUC_LIC_TYPE_FIRST + 1]; + struct guc_log_buffer_entry_list entry[GUC_LOG_BUFFER_INIT_CONFIG]; +}; + +static struct guc_log_buffer_entry_markers { + u32 key[2]; +} const entry_markers[GUC_LOG_BUFFER_INIT_CONFIG + 1] = { + {{ + GUC_LFD_LOG_BUFFER_MARKER_1V2, + GUC_LFD_LOG_BUFFER_MARKER_2 + }}, + {{ + GUC_LFD_LOG_BUFFER_MARKER_1V2, + GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2 + }}, + {{ + GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2, + GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2 + }}, + {{ + GUC_LIC_MAGIC, + (FIELD_PREP_CONST(GUC_LIC_VERSION_MASK_MAJOR, GUC_LIC_VERSION_MAJOR) | + FIELD_PREP_CONST(GUC_LIC_VERSION_MASK_MINOR, GUC_LIC_VERSION_MINOR)) + }} +}; + +static struct guc_log_lic_lfd_map { + u32 lic; + u32 lfd; +} const lic_lfd_type_map[] = { + {GUC_LIC_TYPE_GUC_SW_VERSION, GUC_LFD_TYPE_FW_VERSION}, + {GUC_LIC_TYPE_GUC_DEVICE_ID, GUC_LFD_TYPE_GUC_DEVICE_ID}, + {GUC_LIC_TYPE_TSC_FREQUENCY, GUC_LFD_TYPE_TSC_FREQUENCY}, + {GUC_LIC_TYPE_GMD_ID, GUC_LFD_TYPE_GMD_ID}, + {GUC_LIC_TYPE_BUILD_PLATFORM_ID, GUC_LFD_TYPE_BUILD_PLATFORM_ID} +}; + static struct xe_guc * log_to_guc(struct xe_guc_log *log) { @@ -37,33 +110,6 @@ log_to_xe(struct xe_guc_log *log) return gt_to_xe(log_to_gt(log)); } -static size_t guc_log_size(void) -{ - /* - * GuC Log buffer Layout - * - * +===============================+ 00B - * | Crash dump state header | - * +-------------------------------+ 32B - * | Debug state header | - * +-------------------------------+ 64B - * | Capture state header | - * +-------------------------------+ 96B - * | | - * +===============================+ PAGE_SIZE (4KB) - * | Crash Dump logs | - * +===============================+ + CRASH_SIZE - * | Debug logs | - * +===============================+ + DEBUG_SIZE - * | Capture logs | - * +===============================+ + CAPTURE_SIZE - */ - return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + - CAPTURE_BUFFER_SIZE; -} - -#define GUC_LOG_CHUNK_SIZE SZ_2M - static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic) { struct xe_guc_log_snapshot *snapshot; @@ -145,7 +191,6 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, struct xe_device *xe = log_to_xe(log); struct xe_guc *guc = log_to_guc(log); struct xe_gt *gt = log_to_gt(log); - unsigned int fw_ref; size_t remain; int i; @@ -165,13 +210,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, remain -= size; } - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) { + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) snapshot->stamp = ~0ULL; - } else { + else snapshot->stamp = xe_mmio_read64_2x32(>->mmio, GUC_PMTIMESTAMP_LO); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } + snapshot->ktime = ktime_get_boottime_ns(); snapshot->level = log->level; snapshot->ver_found = guc->fw.versions.found[XE_UC_FW_VER_RELEASE]; @@ -216,6 +260,318 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ } } +static inline void lfd_output_binary(struct drm_printer *p, char *buf, int buf_size) +{ + seq_write(p->arg, buf, buf_size); +} + +static inline int xe_guc_log_add_lfd_header(struct guc_lfd_data *lfd) +{ + lfd->header = FIELD_PREP_CONST(GUC_LFD_DATA_HEADER_MASK_MAGIC, GUC_LFD_DATA_HEADER_MAGIC); + return offsetof(struct guc_lfd_data, data); +} + +static int xe_guc_log_add_typed_payload(struct drm_printer *p, u32 type, + u32 data_len, void *data) +{ + struct guc_lfd_data lfd; + int len; + + len = xe_guc_log_add_lfd_header(&lfd); + lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, type); + /* make length DW aligned */ + lfd.data_count = DIV_ROUND_UP(data_len, sizeof(u32)); + lfd_output_binary(p, (char *)&lfd, len); + + lfd_output_binary(p, data, data_len); + len += lfd.data_count * sizeof(u32); + + return len; +} + +static inline int lic_type_to_index(u32 lic_type) +{ + XE_WARN_ON(lic_type < GUC_LIC_TYPE_FIRST || lic_type > GUC_LIC_TYPE_LAST); + + return lic_type - GUC_LIC_TYPE_FIRST; +} + +static inline int lfd_type_to_index(u32 lfd_type) +{ + int i, lic_type = 0; + + XE_WARN_ON(lfd_type < GUC_LFD_TYPE_FW_RANGE_FIRST || lfd_type > GUC_LFD_TYPE_FW_RANGE_LAST); + + for (i = 0; i < ARRAY_SIZE(lic_lfd_type_map); i++) + if (lic_lfd_type_map[i].lfd == lfd_type) + lic_type = lic_lfd_type_map[i].lic; + + /* If not found, lic_type_to_index will warning invalid type */ + return lic_type_to_index(lic_type); +} + +static int xe_guc_log_add_klv(struct drm_printer *p, u32 lfd_type, + struct guc_lic_save *config) +{ + int klv_index = lfd_type_to_index(lfd_type); + + return xe_guc_log_add_typed_payload(p, lfd_type, sizeof(u32), &config->values[klv_index]); +} + +static int xe_guc_log_add_os_id(struct drm_printer *p, u32 id) +{ + struct guc_lfd_data_os_info os_id; + struct guc_lfd_data lfd; + int len, info_len, section_len; + char *version; + u32 blank = 0; + + len = xe_guc_log_add_lfd_header(&lfd); + lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, GUC_LFD_TYPE_OS_ID); + + os_id.os_id = id; + section_len = offsetof(struct guc_lfd_data_os_info, build_version); + + version = init_utsname()->release; + info_len = strlen(version); + + /* make length DW aligned */ + lfd.data_count = DIV_ROUND_UP(section_len + info_len, sizeof(u32)); + lfd_output_binary(p, (char *)&lfd, len); + lfd_output_binary(p, (char *)&os_id, section_len); + lfd_output_binary(p, version, info_len); + + /* Padding with 0 */ + section_len = lfd.data_count * sizeof(u32) - section_len - info_len; + if (section_len) + lfd_output_binary(p, (char *)&blank, section_len); + + len += lfd.data_count * sizeof(u32); + return len; +} + +static void xe_guc_log_loop_log_init(struct guc_lic *init, struct guc_lic_save *config) +{ + struct guc_klv_generic_dw_t *p = (void *)init->data; + int i; + + for (i = 0; i < init->data_count;) { + int klv_len = FIELD_GET(GUC_KLV_0_LEN, p->kl) + 1; + int key = FIELD_GET(GUC_KLV_0_KEY, p->kl); + + if (key < GUC_LIC_TYPE_FIRST || key > GUC_LIC_TYPE_LAST) { + XE_WARN_ON(key < GUC_LIC_TYPE_FIRST || key > GUC_LIC_TYPE_LAST); + break; + } + config->values[lic_type_to_index(key)] = p->value; + i += klv_len + 1; /* Whole KLV structure length in dwords */ + p = (void *)((u32 *)p + klv_len); + } +} + +static int find_marker(u32 mark0, u32 mark1) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(entry_markers); i++) + if (mark0 == entry_markers[i].key[0] && mark1 == entry_markers[i].key[1]) + return i; + + return ARRAY_SIZE(entry_markers); +} + +static void xe_guc_log_load_lic(void *guc_log, struct guc_lic_save *config) +{ + u32 offset = GUC_LOG_BUFFER_STATE_HEADER_LENGTH; + struct guc_log_buffer_state *p = guc_log; + + config->version = p->version; + while (p->marker[0]) { + int index; + + index = find_marker(p->marker[0], p->marker[1]); + + if (index < ARRAY_SIZE(entry_markers)) { + if (index == GUC_LOG_BUFFER_INIT_CONFIG) { + /* Load log init config */ + xe_guc_log_loop_log_init((void *)p, config); + + /* LIC structure is the last */ + return; + } + config->entry[index].offset = offset; + config->entry[index].rd_ptr = p->read_ptr; + config->entry[index].wr_ptr = p->write_ptr; + config->entry[index].wrap_offset = p->wrap_offset; + config->entry[index].buf_size = p->size; + } + offset += p->size; + p++; + } +} + +static int +xe_guc_log_output_lfd_init(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot, + struct guc_lic_save *config) +{ + int type, len; + size_t size = 0; + + /* FW required types */ + for (type = GUC_LFD_TYPE_FW_RANGE_FIRST; type <= GUC_LFD_TYPE_FW_RANGE_LAST; type++) + size += xe_guc_log_add_klv(p, type, config); + + /* KMD required type(s) */ + len = xe_guc_log_add_os_id(p, GUC_LFD_OS_TYPE_OSID_LIN); + size += len; + + return size; +} + +static void +xe_guc_log_print_chunks(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot, + u32 from, u32 to) +{ + int chunk_from = from % GUC_LOG_CHUNK_SIZE; + int chunk_id = from / GUC_LOG_CHUNK_SIZE; + int to_chunk_id = to / GUC_LOG_CHUNK_SIZE; + int chunk_to = to % GUC_LOG_CHUNK_SIZE; + int pos = from; + + do { + size_t size = (to_chunk_id == chunk_id ? chunk_to : GUC_LOG_CHUNK_SIZE) - + chunk_from; + + lfd_output_binary(p, snapshot->copy[chunk_id] + chunk_from, size); + pos += size; + chunk_id++; + chunk_from = 0; + } while (pos < to); +} + +static inline int +xe_guc_log_add_log_event(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot, + struct guc_lic_save *config) +{ + size_t size; + u32 data_len, section_len; + struct guc_lfd_data lfd; + struct guc_log_buffer_entry_list *entry; + struct guc_lfd_data_log_events_buf events_buf; + + entry = &config->entry[GUC_LOG_TYPE_EVENT_DATA]; + + /* Skip empty log */ + if (entry->rd_ptr == entry->wr_ptr) + return 0; + + size = xe_guc_log_add_lfd_header(&lfd); + lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, GUC_LFD_TYPE_LOG_EVENTS_BUFFER); + events_buf.log_events_format_version = config->version; + + /* Adjust to log_format_buf */ + section_len = offsetof(struct guc_lfd_data_log_events_buf, log_event); + data_len = section_len; + + /* Calculate data length */ + data_len += entry->rd_ptr < entry->wr_ptr ? (entry->wr_ptr - entry->rd_ptr) : + (entry->wr_ptr + entry->wrap_offset - entry->rd_ptr); + /* make length u32 aligned */ + lfd.data_count = DIV_ROUND_UP(data_len, sizeof(u32)); + + /* Output GUC_LFD_TYPE_LOG_EVENTS_BUFFER header */ + lfd_output_binary(p, (char *)&lfd, size); + lfd_output_binary(p, (char *)&events_buf, section_len); + + /* Output data from guc log chunks directly */ + if (entry->rd_ptr < entry->wr_ptr) { + xe_guc_log_print_chunks(p, snapshot, entry->offset + entry->rd_ptr, + entry->offset + entry->wr_ptr); + } else { + /* 1st, print from rd to wrap offset */ + xe_guc_log_print_chunks(p, snapshot, entry->offset + entry->rd_ptr, + entry->offset + entry->wrap_offset); + + /* 2nd, print from buf start to wr */ + xe_guc_log_print_chunks(p, snapshot, entry->offset, entry->offset + entry->wr_ptr); + } + return size; +} + +static int +xe_guc_log_add_crash_dump(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot, + struct guc_lic_save *config) +{ + struct guc_log_buffer_entry_list *entry; + int chunk_from, chunk_id; + int from, to, i; + size_t size = 0; + u32 *buf32; + + entry = &config->entry[GUC_LOG_TYPE_CRASH_DUMP]; + + /* Skip zero sized crash dump */ + if (!entry->buf_size) + return 0; + + /* Check if crash dump section are all zero */ + from = entry->offset; + to = entry->offset + entry->buf_size; + chunk_from = from % GUC_LOG_CHUNK_SIZE; + chunk_id = from / GUC_LOG_CHUNK_SIZE; + buf32 = snapshot->copy[chunk_id] + chunk_from; + + for (i = 0; i < entry->buf_size / sizeof(u32); i++) + if (buf32[i]) + break; + + /* Buffer has non-zero data? */ + if (i < entry->buf_size / sizeof(u32)) { + struct guc_lfd_data lfd; + + size = xe_guc_log_add_lfd_header(&lfd); + lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, GUC_LFD_TYPE_FW_CRASH_DUMP); + /* Calculate data length */ + lfd.data_count = DIV_ROUND_UP(entry->buf_size, sizeof(u32)); + /* Output GUC_LFD_TYPE_FW_CRASH_DUMP header */ + lfd_output_binary(p, (char *)&lfd, size); + + /* rd/wr ptr is not used for crash dump */ + xe_guc_log_print_chunks(p, snapshot, from, to); + } + return size; +} + +static void +xe_guc_log_snapshot_print_lfd(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p) +{ + struct guc_lfd_file_header header; + struct guc_lic_save config; + size_t size; + + if (!snapshot || !snapshot->size) + return; + + header.magic = GUC_LFD_DRIVER_KEY_STREAMING; + header.version = FIELD_PREP_CONST(GUC_LFD_FILE_HEADER_VERSION_MASK_MINOR, + GUC_LFD_FORMAT_VERSION_MINOR) | + FIELD_PREP_CONST(GUC_LFD_FILE_HEADER_VERSION_MASK_MAJOR, + GUC_LFD_FORMAT_VERSION_MAJOR); + + /* Output LFD file header */ + lfd_output_binary(p, (char *)&header, + offsetof(struct guc_lfd_file_header, stream)); + + /* Output LFD stream */ + xe_guc_log_load_lic(snapshot->copy[0], &config); + size = xe_guc_log_output_lfd_init(p, snapshot, &config); + if (!size) + return; + + xe_guc_log_add_log_event(p, snapshot, &config); + xe_guc_log_add_crash_dump(p, snapshot, &config); +} + /** * xe_guc_log_print_dmesg - dump a copy of the GuC log to dmesg * @log: GuC log structure @@ -251,13 +607,27 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) xe_guc_log_snapshot_free(snapshot); } +/** + * xe_guc_log_print_lfd - dump a copy of the GuC log in LFD format + * @log: GuC log structure + * @p: the printer object to output to + */ +void xe_guc_log_print_lfd(struct xe_guc_log *log, struct drm_printer *p) +{ + struct xe_guc_log_snapshot *snapshot; + + snapshot = xe_guc_log_snapshot_capture(log, false); + xe_guc_log_snapshot_print_lfd(snapshot, p); + xe_guc_log_snapshot_free(snapshot); +} + int xe_guc_log_init(struct xe_guc_log *log) { struct xe_device *xe = log_to_xe(log); struct xe_tile *tile = gt_to_tile(log_to_gt(log)); struct xe_bo *bo; - bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), + bo = xe_managed_bo_create_pin_map(xe, tile, GUC_LOG_SIZE, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE | @@ -265,7 +635,7 @@ int xe_guc_log_init(struct xe_guc_log *log) if (IS_ERR(bo)) return PTR_ERR(bo); - xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size()); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); log->bo = bo; log->level = xe_modparam.guc_log_level; @@ -274,71 +644,6 @@ int xe_guc_log_init(struct xe_guc_log *log) ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */ -static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log) -{ - return CRASH_BUFFER_SIZE; -} - -static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log) -{ - return DEBUG_BUFFER_SIZE; -} - -/** - * xe_guc_log_section_size_capture - Get capture buffer size within log sections. - * @log: The log object. - * - * This function will return the capture buffer size within log sections. - * - * Return: capture buffer size. - */ -u32 xe_guc_log_section_size_capture(struct xe_guc_log *log) -{ - return CAPTURE_BUFFER_SIZE; -} - -/** - * xe_guc_get_log_buffer_size - Get log buffer size for a type. - * @log: The log object. - * @type: The log buffer type - * - * Return: buffer size. - */ -u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type) -{ - switch (type) { - case GUC_LOG_BUFFER_CRASH_DUMP: - return xe_guc_log_section_size_crash(log); - case GUC_LOG_BUFFER_DEBUG: - return xe_guc_log_section_size_debug(log); - case GUC_LOG_BUFFER_CAPTURE: - return xe_guc_log_section_size_capture(log); - } - return 0; -} - -/** - * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type. - * @log: The log object. - * @type: The log buffer type - * - * This function will return the offset in the log buffer for a type. - * Return: buffer offset. - */ -u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type) -{ - enum guc_log_buffer_type i; - u32 offset = PAGE_SIZE;/* for the log_buffer_states */ - - for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) { - if (i == type) - break; - offset += xe_guc_get_log_buffer_size(log, i); - } - - return offset; -} - /** * xe_guc_check_log_buf_overflow - Check if log buffer overflowed * @log: The log object. @@ -352,7 +657,7 @@ u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_typ * * Return: True if overflowed. */ -bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type, +bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_type type, unsigned int full_cnt) { unsigned int prev_full_cnt = log->stats[type].sampled_overflow; diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 98a47ac42b08..1b05bb60c1c7 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -13,14 +13,26 @@ struct drm_printer; struct xe_device; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) -#define CRASH_BUFFER_SIZE SZ_1M -#define DEBUG_BUFFER_SIZE SZ_8M -#define CAPTURE_BUFFER_SIZE SZ_2M +#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_8M +#define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_1M +#define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_2M #else -#define CRASH_BUFFER_SIZE SZ_16K -#define DEBUG_BUFFER_SIZE SZ_64K -#define CAPTURE_BUFFER_SIZE SZ_1M +#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_64K +#define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_16K +#define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_1M #endif + +#define GUC_LOG_SIZE (SZ_4K + \ + XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE + \ + XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE + \ + XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE) + +#define XE_GUC_LOG_EVENT_DATA_OFFSET SZ_4K +#define XE_GUC_LOG_CRASH_DUMP_OFFSET (XE_GUC_LOG_EVENT_DATA_OFFSET + \ + XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE) +#define XE_GUC_LOG_STATE_CAPTURE_OFFSET (XE_GUC_LOG_CRASH_DUMP_OFFSET + \ + XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE) + /* * While we're using plain log level in i915, GuC controls are much more... * "elaborate"? We have a couple of bits for verbosity, separate bit for actual @@ -40,6 +52,7 @@ struct xe_device; int xe_guc_log_init(struct xe_guc_log *log); void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p); +void xe_guc_log_print_lfd(struct xe_guc_log *log, struct drm_printer *p); void xe_guc_log_print_dmesg(struct xe_guc_log *log); struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic); void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p); @@ -51,11 +64,8 @@ xe_guc_log_get_level(struct xe_guc_log *log) return log->level; } -u32 xe_guc_log_section_size_capture(struct xe_guc_log *log); -u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type); -u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type); bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, - enum guc_log_buffer_type type, + enum guc_log_type type, unsigned int full_cnt); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 951a49fb1d3e..54702a0fd05b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -76,7 +76,7 @@ * exposes a programming interface to the host for the control of SLPC. * * Frequency management: - * ===================== + * --------------------- * * Xe driver enables SLPC with all of its defaults features and frequency * selection, which varies per platform. @@ -87,7 +87,7 @@ * for any workload. * * Render-C States: - * ================ + * ---------------- * * Render-C states is also a GuC PC feature that is now enabled in Xe for * all platforms. @@ -499,21 +499,17 @@ u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc) int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) { struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; /* * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) return -ETIMEDOUT; - } *freq = get_cur_freq(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -1087,13 +1083,8 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) */ int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode) { - int ret; - - xe_pm_runtime_get(pc_to_xe(pc)); - ret = pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); - xe_pm_runtime_put(pc_to_xe(pc)); - - return ret; + guard(xe_pm_runtime)(pc_to_xe(pc)); + return pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); } /** @@ -1104,13 +1095,8 @@ int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mod */ int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc) { - int ret; - - xe_pm_runtime_get(pc_to_xe(pc)); - ret = pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); - xe_pm_runtime_put(pc_to_xe(pc)); - - return ret; + guard(xe_pm_runtime)(pc_to_xe(pc)); + return pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); } static void pc_init_pcode_freq(struct xe_guc_pc *pc) @@ -1198,7 +1184,7 @@ int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) return -EINVAL; guard(mutex)(&pc->freq_lock); - xe_pm_runtime_get_noresume(pc_to_xe(pc)); + guard(xe_pm_runtime_noresume)(pc_to_xe(pc)); ret = pc_action_set_param(pc, SLPC_PARAM_POWER_PROFILE, @@ -1209,8 +1195,6 @@ int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) else pc->power_profile = val; - xe_pm_runtime_put(pc_to_xe(pc)); - return ret; } @@ -1223,17 +1207,14 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); - unsigned int fw_ref; ktime_t earlier; int ret; xe_gt_assert(gt, xe_device_uc_enabled(xe)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) return -ETIMEDOUT; - } if (xe->info.skip_guc_pc) { if (xe->info.platform != XE_PVC) @@ -1241,9 +1222,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) /* Request max possible since dynamic freq mgmt is not enabled */ pc_set_cur_freq(pc, UINT_MAX); - - ret = 0; - goto out; + return 0; } xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); @@ -1252,7 +1231,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) earlier = ktime_get(); ret = pc_action_reset(pc); if (ret) - goto out; + return ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, SLPC_RESET_TIMEOUT_MS)) { @@ -1263,8 +1242,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, SLPC_RESET_EXTENDED_TIMEOUT_MS)) { xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); - ret = -EIO; - goto out; + return -EIO; } xe_gt_warn(gt, "GuC PC excessive start time: %lldms", @@ -1273,21 +1251,20 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) ret = pc_init_freqs(pc); if (ret) - goto out; + return ret; ret = pc_set_mert_freq_cap(pc); if (ret) - goto out; + return ret; if (xe->info.platform == XE_PVC) { xe_guc_pc_gucrc_disable(pc); - ret = 0; - goto out; + return 0; } ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); if (ret) - goto out; + return ret; /* Enable SLPC Optimized Strategy for compute */ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); @@ -1297,8 +1274,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (unlikely(ret)) xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); -out: - xe_force_wake_put(gt_to_fw(gt), fw_ref); return ret; } @@ -1330,19 +1305,16 @@ static void xe_guc_pc_fini_hw(void *arg) { struct xe_guc_pc *pc = arg; struct xe_device *xe = pc_to_xe(pc); - unsigned int fw_ref; if (xe_device_wedged(xe)) return; - fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc))); - - xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index f6ba2b0f074d..0b590271c326 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -19,6 +19,7 @@ #include "abi/guc_klvs_abi.h" #include "regs/xe_lrc_layout.h" #include "xe_assert.h" +#include "xe_bo.h" #include "xe_devcoredump.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -47,6 +48,8 @@ #include "xe_uc_fw.h" #include "xe_vm.h" +#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 + static struct xe_guc * exec_queue_to_guc(struct xe_exec_queue *q) { @@ -72,6 +75,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) #define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) +#define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 14) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -263,6 +267,21 @@ static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); } +static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) +{ + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; +} + +static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); +} + +static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) +{ + atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); +} + static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { return (atomic_read(&q->guc->state) & @@ -541,7 +560,8 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) u32 slpc_exec_queue_freq_req = 0; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && + !xe_exec_queue_is_multi_queue_secondary(q)); if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; @@ -561,6 +581,8 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue { struct exec_queue_policy policy; + xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); + __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); @@ -568,6 +590,89 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue __guc_exec_queue_policy_action_size(&policy), 0, 0); } +static bool vf_recovery(struct xe_guc *guc) +{ + return xe_gt_recovery_pending(guc_to_gt(guc)); +} + +static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ + wake_up_all(&xe->ufence_wq); + + if (xe_exec_queue_is_lr(q)) + queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); + else + xe_sched_tdr_queue_imm(&q->guc->sched); +} + +static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + + /* Group banned, skip timeout check in TDR */ + WRITE_ONCE(group->banned, true); + xe_guc_exec_queue_trigger_cleanup(primary); + + mutex_lock(&group->list_lock); + list_for_each_entry(eq, &group->list, multi_queue.link) + xe_guc_exec_queue_trigger_cleanup(eq); + mutex_unlock(&group->list_lock); +} + +static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) +{ + if (xe_exec_queue_is_multi_queue(q)) { + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + /* Group banned, skip timeout check in TDR */ + WRITE_ONCE(group->banned, true); + + set_exec_queue_reset(primary); + if (!exec_queue_banned(primary) && !exec_queue_check_timeout(primary)) + xe_guc_exec_queue_trigger_cleanup(primary); + + mutex_lock(&group->list_lock); + list_for_each_entry(eq, &group->list, multi_queue.link) { + set_exec_queue_reset(eq); + if (!exec_queue_banned(eq) && !exec_queue_check_timeout(eq)) + xe_guc_exec_queue_trigger_cleanup(eq); + } + mutex_unlock(&group->list_lock); + } else { + set_exec_queue_reset(q); + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) + xe_guc_exec_queue_trigger_cleanup(q); + } +} + +static void set_exec_queue_group_banned(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + /* Ban all queues of the multi-queue group */ + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + set_exec_queue_banned(primary); + + mutex_lock(&group->list_lock); + list_for_each_entry(eq, &group->list, multi_queue.link) + set_exec_queue_banned(eq); + mutex_unlock(&group->list_lock); +} + #define parallel_read(xe_, map_, field_) \ xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_) @@ -575,6 +680,181 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_, val_) +/** + * DOC: Multi Queue Group GuC interface + * + * The multi queue group coordination between KMD and GuC is through a software + * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page + * allocated in the global GTT. + * + * CGP format: + * + * +-----------+---------------------------+---------------------------------------------+ + * | DWORD | Name | Description | + * +-----------+---------------------------+---------------------------------------------+ + * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | + * +-----------+---------------------------+---------------------------------------------+ + * | 1..15 | RESERVED | MBZ | + * +-----------+---------------------------+---------------------------------------------+ + * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | + * +-----------+---------------------------+---------------------------------------------+ + * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | + * +-----------+---------------------------+---------------------------------------------+ + * | 18..31 | RESERVED | MBZ | + * +-----------+---------------------------+---------------------------------------------+ + * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | + * +-----------+---------------------------+---------------------------------------------+ + * | 33 | Q0ContextIndex | Context ID for Queue 0 | + * +-----------+---------------------------+---------------------------------------------+ + * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | + * +-----------+---------------------------+---------------------------------------------+ + * | 35 | Q1ContextIndex | Context ID for Queue 1 | + * +-----------+---------------------------+---------------------------------------------+ + * | ... |... | ... | + * +-----------+---------------------------+---------------------------------------------+ + * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | + * +-----------+---------------------------+---------------------------------------------+ + * | 159 | Q63ContextIndex | Context ID for Queue 63 | + * +-----------+---------------------------+---------------------------------------------+ + * | 160..1024 | RESERVED | MBZ | + * +-----------+---------------------------+---------------------------------------------+ + * + * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified + * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies + * the CGP address. When the secondary queues are added to the group, the CGP is + * updated with entry for that queue and GuC is notified through the H2G interface + * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages + * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also + * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any + * error in the CGP. Only one of these CGP update messages can be outstanding + * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* + * fields indicate which queue entry is being updated in the CGP. + * + * The primary queue (Q0) represents the multi queue group context in GuC and + * submission on any queue of the group must be through Q0 GuC interface only. + * + * As it is not required to register secondary queues with GuC, the secondary queue + * context ids in the CGP are populated with Q0 context id. + */ + +#define CGP_VERSION_MAJOR_SHIFT 8 + +static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, + struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 guc_id = group->primary->guc->id; + + /* Currently implementing CGP version 1.0 */ + xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, + 1 << CGP_VERSION_MAJOR_SHIFT); + + xe_map_wr(xe, &group->cgp_bo->vmap, + (32 + q->multi_queue.pos * 2) * sizeof(u32), + u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); + + xe_map_wr(xe, &group->cgp_bo->vmap, + (33 + q->multi_queue.pos * 2) * sizeof(u32), + u32, guc_id); + + if (q->multi_queue.pos / 32) { + xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), + u32, BIT(q->multi_queue.pos % 32)); + xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); + } else { + xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), + u32, BIT(q->multi_queue.pos)); + xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); + } +} + +static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, + struct xe_exec_queue *q, + const u32 *action, u32 len) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_device *xe = guc_to_xe(guc); + long ret; + + /* + * As all queues of a multi queue group use single drm scheduler + * submit workqueue, CGP synchronization with GuC are serialized. + * Hence, no locking is required here. + * Wait for any pending CGP_SYNC_DONE response before updating the + * CGP page and sending CGP_SYNC message. + * + * FIXME: Support VF migration + */ + ret = wait_event_timeout(guc->ct.wq, + !READ_ONCE(group->sync_pending) || + xe_guc_read_stopped(guc), HZ); + if (!ret || xe_guc_read_stopped(guc)) { + /* CGP_SYNC failed. Reset gt, cleanup the group */ + xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); + set_exec_queue_group_banned(q); + xe_gt_reset_async(q->gt); + xe_guc_exec_queue_group_trigger_cleanup(q); + return; + } + + xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); + xe_guc_exec_queue_group_cgp_update(xe, q); + + WRITE_ONCE(group->sync_pending, true); + xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); +} + +static void __register_exec_queue_group(struct xe_guc *guc, + struct xe_exec_queue *q, + struct guc_ctxt_registration_info *info) +{ +#define MAX_MULTI_QUEUE_REG_SIZE (8) + u32 action[MAX_MULTI_QUEUE_REG_SIZE]; + int len = 0; + + action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; + action[len++] = info->flags; + action[len++] = info->context_idx; + action[len++] = info->engine_class; + action[len++] = info->engine_submit_mask; + action[len++] = 0; /* Reserved */ + action[len++] = info->cgp_lo; + action[len++] = info->cgp_hi; + + xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); +#undef MAX_MULTI_QUEUE_REG_SIZE + + /* + * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a + * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response + * from guc. + */ + xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); +} + +static void xe_guc_exec_queue_group_add(struct xe_guc *guc, + struct xe_exec_queue *q) +{ +#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) + u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; + int len = 0; + + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); + + action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; + action[len++] = q->multi_queue.group->primary->guc->id; + + xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); +#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE + + /* + * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a + * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response + * from guc. + */ + xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); +} + static void __register_mlrc_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q, struct guc_ctxt_registration_info *info) @@ -670,6 +950,13 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) info.flags = CONTEXT_REGISTRATION_FLAG_KMD | FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); + if (xe_exec_queue_is_multi_queue(q)) { + struct xe_exec_queue_group *group = q->multi_queue.group; + + info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); + info.cgp_hi = 0; + } + if (xe_exec_queue_is_parallel(q)) { u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); struct iosys_map map = xe_lrc_parallel_map(lrc); @@ -700,11 +987,18 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) set_exec_queue_registered(q); trace_xe_exec_queue_register(q); - if (xe_exec_queue_is_parallel(q)) + if (xe_exec_queue_is_multi_queue_primary(q)) + __register_exec_queue_group(guc, q, &info); + else if (xe_exec_queue_is_parallel(q)) __register_mlrc_exec_queue(guc, q, &info); - else + else if (!xe_exec_queue_is_multi_queue_secondary(q)) __register_exec_queue(guc, &info); - init_policies(guc, q); + + if (!xe_exec_queue_is_multi_queue_secondary(q)) + init_policies(guc, q); + + if (xe_exec_queue_is_multi_queue_secondary(q)) + xe_guc_exec_queue_group_add(guc, q); } static u32 wq_space_until_wrap(struct xe_exec_queue *q) @@ -712,11 +1006,6 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q) return (WQ_SIZE - q->guc->wqi_tail); } -static bool vf_recovery(struct xe_guc *guc) -{ - return xe_gt_recovery_pending(guc_to_gt(guc)); -} - static inline void relaxed_ms_sleep(unsigned int delay_ms) { unsigned long min_us, max_us; @@ -845,7 +1134,7 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) if (!job->restore_replay || job->last_replay) { if (xe_exec_queue_is_parallel(q)) wq_item_append(q); - else + else if (!exec_queue_idle_skip_suspend(q)) xe_lrc_set_ring_tail(lrc, lrc->ring.tail); job->last_replay = false; } @@ -853,6 +1142,12 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; + /* + * All queues in a multi-queue group will use the primary queue + * of the group to interface with GuC. + */ + q = xe_exec_queue_multi_queue_primary(q); + if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; action[len++] = q->guc->id; @@ -899,6 +1194,18 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) trace_xe_sched_job_run(job); if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { + if (xe_exec_queue_is_multi_queue_secondary(q)) { + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + + if (exec_queue_killed_or_banned_or_wedged(primary)) { + killed_or_banned_or_wedged = true; + goto run_job_out; + } + + if (!exec_queue_registered(primary)) + register_exec_queue(primary, GUC_CONTEXT_NORMAL); + } + if (!exec_queue_registered(q)) register_exec_queue(q, GUC_CONTEXT_NORMAL); if (!job->restore_replay) @@ -907,6 +1214,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) job->restore_replay = false; } +run_job_out: /* * We don't care about job-fence ordering in LR VMs because these fences * are never exported; they are used solely to keep jobs on the pending @@ -932,6 +1240,11 @@ int xe_guc_read_stopped(struct xe_guc *guc) return atomic_read(&guc->submission_state.stopped); } +static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, + struct xe_exec_queue *q, + u32 runnable_state); +static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); + #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ u32 action[] = { \ XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ @@ -945,7 +1258,9 @@ static void disable_scheduling_deregister(struct xe_guc *guc, MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); int ret; - set_min_preemption_timeout(guc, q); + if (!xe_exec_queue_is_multi_queue_secondary(q)) + set_min_preemption_timeout(guc, q); + smp_rmb(); ret = wait_event_timeout(guc->ct.wq, (!exec_queue_pending_enable(q) && @@ -973,23 +1288,12 @@ static void disable_scheduling_deregister(struct xe_guc *guc, * Reserve space for both G2H here as the 2nd G2H is sent from a G2H * handler and we are not allowed to reserved G2H space in handlers. */ - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + - G2H_LEN_DW_DEREGISTER_CONTEXT, 2); -} - -static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ - wake_up_all(&xe->ufence_wq); - - if (xe_exec_queue_is_lr(q)) - queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); + if (xe_exec_queue_is_multi_queue_secondary(q)) + handle_multi_queue_secondary_sched_done(guc, q, 0); else - xe_sched_tdr_queue_imm(&q->guc->sched); + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + + G2H_LEN_DW_DEREGISTER_CONTEXT, 2); } /** @@ -1181,8 +1485,11 @@ static void enable_scheduling(struct xe_exec_queue *q) set_exec_queue_enabled(q); trace_xe_exec_queue_scheduling_enable(q); - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + if (xe_exec_queue_is_multi_queue_secondary(q)) + handle_multi_queue_secondary_sched_done(guc, q, 1); + else + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || @@ -1206,14 +1513,17 @@ static void disable_scheduling(struct xe_exec_queue *q, bool immediate) xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - if (immediate) + if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) set_min_preemption_timeout(guc, q); clear_exec_queue_enabled(q); set_exec_queue_pending_disable(q); trace_xe_exec_queue_scheduling_disable(q); - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + if (xe_exec_queue_is_multi_queue_secondary(q)) + handle_multi_queue_secondary_sched_done(guc, q, 0); + else + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); } static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1231,8 +1541,11 @@ static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) set_exec_queue_destroyed(q); trace_xe_exec_queue_deregister(q); - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_DEREGISTER_CONTEXT, 1); + if (xe_exec_queue_is_multi_queue_secondary(q)) + handle_deregister_done(guc, q); + else + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, 1); } static enum drm_gpu_sched_stat @@ -1245,7 +1558,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_guc *guc = exec_queue_to_guc(q); const char *process_name = "no process"; struct xe_device *xe = guc_to_xe(guc); - unsigned int fw_ref; int err = -ETIME; pid_t pid = -1; int i = 0; @@ -1271,6 +1583,19 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) exec_queue_killed_or_banned_or_wedged(q) || exec_queue_destroyed(q); + /* Skip timeout check if multi-queue group is banned */ + if (xe_exec_queue_is_multi_queue(q) && + READ_ONCE(q->multi_queue.group->banned)) + skip_timeout_check = true; + + /* + * FIXME: In multi-queue scenario, the TDR must ensure that the whole + * multi-queue group is off the HW before signaling the fences to avoid + * possible memory corruptions. This means disabling scheduling on the + * primary queue before or during the secondary queue's TDR. Need to + * implement this in least obtrusive way. + */ + /* * If devcoredump not captured and GuC capture for the job is not ready * do manual capture first and decide later if we need to use it @@ -1278,13 +1603,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!exec_queue_killed(q) && !xe->devcoredump.captured && !xe_guc_capture_get_matching_and_lock(q)) { /* take force wake before engine register manual capture */ - fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); xe_engine_snapshot_capture_for_queue(q); - - xe_force_wake_put(gt_to_fw(q->gt), fw_ref); } /* @@ -1425,7 +1748,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - xe_guc_exec_queue_trigger_cleanup(q); + if (xe_exec_queue_is_multi_queue(q)) + xe_guc_exec_queue_group_trigger_cleanup(q); + else + xe_guc_exec_queue_trigger_cleanup(q); /* Mark all outstanding jobs as bad, thus completing them */ spin_lock(&sched->base.job_list_lock); @@ -1475,17 +1801,23 @@ static void __guc_exec_queue_destroy_async(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); - xe_pm_runtime_get(guc_to_xe(guc)); + guard(xe_pm_runtime)(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + struct xe_exec_queue_group *group = q->multi_queue.group; + + mutex_lock(&group->list_lock); + list_del(&q->multi_queue.link); + mutex_unlock(&group->list_lock); + } + if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); xe_exec_queue_fini(q); - - xe_pm_runtime_put(guc_to_xe(guc)); } static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) @@ -1590,9 +1922,10 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); + bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); - if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && - exec_queue_enabled(q)) { + if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && + !exec_queue_suspended(q) && exec_queue_enabled(q)) { wait_event(guc->ct.wq, vf_recovery(guc) || ((q->guc->resume_time != RESUME_PENDING || xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); @@ -1611,11 +1944,33 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) disable_scheduling(q, false); } } else if (q->guc->suspend_pending) { + if (idle_skip_suspend) + set_exec_queue_idle_skip_suspend(q); set_exec_queue_suspended(q); suspend_fence_signal(q); } } +static void sched_context(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_lrc *lrc = q->lrc[0]; + u32 action[] = { + XE_GUC_ACTION_SCHED_CONTEXT, + q->guc->id, + }; + + xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); + + trace_xe_exec_queue_submit(q); + + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; @@ -1623,19 +1978,53 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) if (guc_exec_queue_allowed_to_change_state(q)) { clear_exec_queue_suspended(q); if (!exec_queue_enabled(q)) { + if (exec_queue_idle_skip_suspend(q)) { + struct xe_lrc *lrc = q->lrc[0]; + + clear_exec_queue_idle_skip_suspend(q); + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); + } q->guc->resume_time = RESUME_PENDING; set_exec_queue_pending_resume(q); enable_scheduling(q); + } else if (exec_queue_idle_skip_suspend(q)) { + clear_exec_queue_idle_skip_suspend(q); + sched_context(q); } } else { clear_exec_queue_suspended(q); + clear_exec_queue_idle_skip_suspend(q); } } -#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ -#define SET_SCHED_PROPS 2 -#define SUSPEND 3 -#define RESUME 4 +static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) +{ + struct xe_exec_queue *q = msg->private_data; + + if (guc_exec_queue_allowed_to_change_state(q)) { +#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; + int len = 0; + + action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; + action[len++] = group->primary->guc->id; + + xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); +#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE + + xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); + } + + kfree(msg); +} + +#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ +#define SET_SCHED_PROPS 2 +#define SUSPEND 3 +#define RESUME 4 +#define SET_MULTI_QUEUE_PRIORITY 5 #define OPCODE_MASK 0xf #define MSG_LOCKED BIT(8) #define MSG_HEAD BIT(9) @@ -1659,6 +2048,9 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) case RESUME: __guc_exec_queue_process_msg_resume(msg); break; + case SET_MULTI_QUEUE_PRIORITY: + __guc_exec_queue_process_msg_set_multi_queue_priority(msg); + break; default: XE_WARN_ON("Unknown message type"); } @@ -1680,6 +2072,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; struct xe_guc *guc = exec_queue_to_guc(q); + struct workqueue_struct *submit_wq = NULL; struct xe_guc_exec_queue *ge; long timeout; int err, i; @@ -1700,8 +2093,20 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); + + /* + * Use primary queue's submit_wq for all secondary queues of a + * multi queue group. This serialization avoids any locking around + * CGP synchronization with GuC. + */ + if (xe_exec_queue_is_multi_queue_secondary(q)) { + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + + submit_wq = primary->guc->sched.base.submit_wq; + } + err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, + submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) @@ -1730,7 +2135,23 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) xe_exec_queue_assign_name(q, q->guc->id); - trace_xe_exec_queue_create(q); + /* + * Maintain secondary queues of the multi queue group in a list + * for handling dependencies across the queues in the group. + */ + if (xe_exec_queue_is_multi_queue_secondary(q)) { + struct xe_exec_queue_group *group = q->multi_queue.group; + + INIT_LIST_HEAD(&q->multi_queue.link); + mutex_lock(&group->list_lock); + list_add_tail(&q->multi_queue.link, &group->list); + mutex_unlock(&group->list_lock); + } + + if (xe_exec_queue_is_multi_queue(q)) + trace_xe_exec_queue_create_multi_queue(q); + else + trace_xe_exec_queue_create(q); return 0; @@ -1862,6 +2283,27 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, return 0; } +static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, + enum xe_multi_queue_priority priority) +{ + struct xe_sched_msg *msg; + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); + + if (q->multi_queue.priority == priority || + exec_queue_killed_or_banned_or_wedged(q)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + q->multi_queue.priority = priority; + guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); + + return 0; +} + static int guc_exec_queue_suspend(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; @@ -1936,6 +2378,10 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q) static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) { + if (xe_exec_queue_is_multi_queue_secondary(q) && + guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) + return true; + return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); } @@ -1953,6 +2399,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, + .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, .suspend = guc_exec_queue_suspend, .suspend_wait = guc_exec_queue_suspend_wait, .resume = guc_exec_queue_resume, @@ -2202,6 +2649,22 @@ void xe_guc_submit_pause(struct xe_guc *guc) struct xe_exec_queue *q; unsigned long index; + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xe_sched_submission_stop(&q->guc->sched); + mutex_unlock(&guc->submission_state.lock); +} + +/** + * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. + * @guc: the &xe_guc struct instance whose scheduler is to be disabled + */ +void xe_guc_submit_pause_vf(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); mutex_lock(&guc->submission_state.lock); @@ -2293,14 +2756,15 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, } /** - * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC. + * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause */ -void xe_guc_submit_unpause_prepare(struct xe_guc *guc) +void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; + xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); mutex_lock(&guc->submission_state.lock); @@ -2376,6 +2840,23 @@ void xe_guc_submit_unpause(struct xe_guc *guc) struct xe_exec_queue *q; unsigned long index; + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xe_sched_submission_start(&q->guc->sched); + mutex_unlock(&guc->submission_state.lock); +} + +/** + * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. + * @guc: the &xe_guc struct instance whose scheduler is to be enabled + */ +void xe_guc_submit_unpause_vf(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); + mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { /* @@ -2452,7 +2933,11 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) trace_xe_exec_queue_deregister(q); - xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); + if (xe_exec_queue_is_multi_queue_secondary(q)) + handle_deregister_done(guc, q); + else + xe_guc_ct_send_g2h_handler(&guc->ct, action, + ARRAY_SIZE(action)); } static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, @@ -2502,6 +2987,16 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, } } +static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, + struct xe_exec_queue *q, + u32 runnable_state) +{ + /* Take CT lock here as handle_sched_done() do send a h2g message */ + mutex_lock(&guc->ct.lock); + handle_sched_done(guc, q, runnable_state); + mutex_unlock(&guc->ct.lock); +} + int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_exec_queue *q; @@ -2585,8 +3080,9 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) if (unlikely(!q)) return -EPROTO; - xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, + atomic_read(&q->guc->state)); trace_xe_exec_queue_reset(q); @@ -2596,9 +3092,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) * jobs by setting timeout of the job to the minimum value kicking * guc_exec_queue_timedout_job. */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); + xe_guc_exec_queue_reset_trigger_cleanup(q); return 0; } @@ -2666,20 +3160,18 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, * See bspec 54047 and 72187 for details. */ if (type != XE_GUC_CAT_ERR_TYPE_INVALID) - xe_gt_dbg(gt, - "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", - type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + xe_gt_info(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); else - xe_gt_dbg(gt, - "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + xe_gt_info(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); /* Treat the same as engine reset */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); + xe_guc_exec_queue_reset_trigger_cleanup(q); return 0; } @@ -2706,6 +3198,73 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le return 0; } +int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, + u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[2]; + + if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + xe_gt_dbg(gt, + "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", + msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); + + trace_xe_exec_queue_cgp_context_error(q); + + /* Treat the same as engine reset */ + xe_guc_exec_queue_reset_trigger_cleanup(q); + + return 0; +} + +/** + * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler + * @guc: guc + * @msg: message indicating CGP sync done + * @len: length of message + * + * Set multi queue group's sync_pending flag to false and wakeup anyone waiting + * for CGP synchronization to complete. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ +int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + if (!xe_exec_queue_is_multi_queue_primary(q)) { + drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); + return -EPROTO; + } + + /* Wakeup the serialized cgp update wait */ + WRITE_ONCE(q->multi_queue.group->sync_pending, false); + xe_guc_ct_wake_waiters(&guc->ct); + + return 0; +} + static void guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, struct xe_guc_submit_exec_queue_snapshot *snapshot) @@ -2805,6 +3364,11 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) if (snapshot->parallel_execution) guc_exec_queue_wq_snapshot_capture(q, snapshot); + if (xe_exec_queue_is_multi_queue(q)) { + snapshot->multi_queue.valid = true; + snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; + snapshot->multi_queue.pos = q->multi_queue.pos; + } spin_lock(&sched->base.job_list_lock); snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, @@ -2887,6 +3451,11 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps if (snapshot->parallel_execution) guc_exec_queue_wq_snapshot_print(snapshot, p); + if (snapshot->multi_queue.valid) { + drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); + drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); + } + for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; i++) drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index b49a2748ec46..4d89b2975fe9 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -21,9 +21,11 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); void xe_guc_submit_pause(struct xe_guc *guc); -void xe_guc_submit_unpause(struct xe_guc *guc); -void xe_guc_submit_unpause_prepare(struct xe_guc *guc); void xe_guc_submit_pause_abort(struct xe_guc *guc); +void xe_guc_submit_pause_vf(struct xe_guc *guc); +void xe_guc_submit_unpause(struct xe_guc *guc); +void xe_guc_submit_unpause_vf(struct xe_guc *guc); +void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); @@ -34,6 +36,9 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, + u32 len); struct xe_guc_submit_exec_queue_snapshot * xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index dc7456c34583..25e29e85502c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -135,6 +135,19 @@ struct xe_guc_submit_exec_queue_snapshot { u32 wq[WQ_SIZE / sizeof(u32)]; } parallel; + /** @multi_queue: snapshot of the multi queue information */ + struct { + /** + * @multi_queue.primary: GuC id of the primary exec queue + * of the multi queue group. + */ + u32 primary; + /** @multi_queue.pos: Position of the exec queue within the multi queue group */ + u8 pos; + /** @valid: The exec queue is part of a multi queue group */ + bool valid; + } multi_queue; + /** @pending_list_size: Size of the pending list snapshot array */ int pending_list_size; /** @pending_list: snapshot of the pending list info */ diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index a80175c7c478..6532a88d51e2 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -13,6 +13,7 @@ #include "xe_guc_tlb_inval.h" #include "xe_force_wake.h" #include "xe_mmio.h" +#include "xe_sa.h" #include "xe_tlb_inval.h" #include "regs/xe_guc_regs.h" @@ -34,9 +35,12 @@ static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) G2H_LEN_DW_TLB_INVALIDATE, 1); } -#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ +#define MAKE_INVAL_OP_FLUSH(type, flush_cache) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ - XE_GUC_TLB_INVAL_FLUSH_CACHE) + (flush_cache ? \ + XE_GUC_TLB_INVAL_FLUSH_CACHE : 0)) + +#define MAKE_INVAL_OP(type) MAKE_INVAL_OP_FLUSH(type, true) static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) { @@ -71,12 +75,11 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) return send_tlb_inval(guc, action, ARRAY_SIZE(action)); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { struct xe_mmio *mmio = >->mmio; - unsigned int fw_ref; if (IS_SRIOV_VF(xe)) return -ECANCELED; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); @@ -86,12 +89,25 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) xe_mmio_write32(mmio, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); } return -ECANCELED; } +static int send_page_reclaim(struct xe_guc *guc, u32 seqno, + u64 gpu_addr) +{ + u32 action[] = { + XE_GUC_ACTION_PAGE_RECLAMATION, + seqno, + lower_32_bits(gpu_addr), + upper_32_bits(gpu_addr), + }; + + return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_PAGE_RECLAMATION, 1); +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, @@ -100,20 +116,21 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, - u64 start, u64 end, u32 asid) + u64 start, u64 end, u32 asid, + struct drm_suballoc *prl_sa) { #define MAX_TLB_INVALIDATION_LEN 7 struct xe_guc *guc = tlb_inval->private; struct xe_gt *gt = guc_to_gt(guc); u32 action[MAX_TLB_INVALIDATION_LEN]; u64 length = end - start; - int len = 0; + int len = 0, err; if (guc_to_xe(guc)->info.force_execlist) return -ECANCELED; action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = seqno; + action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID; if (!gt_to_xe(gt)->info.has_range_tlb_inval || length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); @@ -154,7 +171,8 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, ilog2(SZ_2M) + 1))); xe_gt_assert(gt, IS_ALIGNED(start, length)); - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ + action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa); action[len++] = asid; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); @@ -163,7 +181,10 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); - return send_tlb_inval(guc, action, len); + err = send_tlb_inval(guc, action, len); + if (!err && prl_sa) + err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa)); + return err; } static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 0a70c8924582..4212162913af 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -300,19 +300,16 @@ void xe_huc_sanitize(struct xe_huc *huc) void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) { struct xe_gt *gt = huc_to_gt(huc); - unsigned int fw_ref; xe_uc_fw_print(&huc->fw, p); if (!xe_uc_fw_is_enabled(&huc->fw)) return; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; drm_printf(p, "\nHuC status: 0x%08x\n", xe_mmio_read32(>->mmio, HUC_KERNEL_LOAD_INFO)); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index 3a888a40188b..df9c4d79b710 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -37,9 +37,8 @@ static int huc_info(struct seq_file *m, void *data) struct xe_device *xe = huc_to_xe(huc); struct drm_printer p = drm_seq_file_printer(m); - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); xe_huc_print_info(huc, &p); - xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 640950172088..cb45cdceef67 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -43,16 +43,14 @@ static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, { struct xe_device *xe = kobj_to_xe(kobj); struct kobj_attribute *kattr; - ssize_t ret = -EIO; kattr = container_of(attr, struct kobj_attribute, attr); if (kattr->show) { - xe_pm_runtime_get(xe); - ret = kattr->show(kobj, kattr, buf); - xe_pm_runtime_put(xe); + guard(xe_pm_runtime)(xe); + return kattr->show(kobj, kattr, buf); } - return ret; + return -EIO; } static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, @@ -62,16 +60,14 @@ static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, { struct xe_device *xe = kobj_to_xe(kobj); struct kobj_attribute *kattr; - ssize_t ret = -EIO; kattr = container_of(attr, struct kobj_attribute, attr); if (kattr->store) { - xe_pm_runtime_get(xe); - ret = kattr->store(kobj, kattr, buf, count); - xe_pm_runtime_put(xe); + guard(xe_pm_runtime)(xe); + return kattr->store(kobj, kattr, buf, count); } - return ret; + return -EIO; } static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index fa4db5f23342..f69a32c27458 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -9,7 +9,9 @@ #include "xe_device.h" #include "xe_exec_queue.h" #include "xe_gt.h" +#include "xe_gt_stats.h" #include "xe_hw_engine_group.h" +#include "xe_sync.h" #include "xe_vm.h" static void @@ -20,7 +22,8 @@ hw_engine_group_resume_lr_jobs_func(struct work_struct *w) int err; enum xe_hw_engine_group_execution_mode previous_mode; - err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode, + NULL, 0); if (err) return; @@ -188,23 +191,39 @@ void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group /** * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group * @group: The hw engine group + * @has_deps: dma-fence job triggering suspend has dependencies * * Return: 0 on success, negative error code on error. */ -static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group, + bool has_deps) { int err; struct xe_exec_queue *q; + struct xe_gt *gt = NULL; bool need_resume = false; + ktime_t start = xe_gt_stats_ktime_get(); lockdep_assert_held_write(&group->mode_sem); list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + bool idle_skip_suspend; + if (!xe_vm_in_fault_mode(q->vm)) continue; - need_resume = true; + idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); + if (!idle_skip_suspend && has_deps) + return -EAGAIN; + + xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1); + if (idle_skip_suspend) + xe_gt_stats_incr(q->gt, + XE_GT_STATS_ID_HW_ENGINE_GROUP_SKIP_LR_QUEUE_COUNT, 1); + + need_resume |= !idle_skip_suspend; q->ops->suspend(q); + gt = q->gt; } list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { @@ -216,6 +235,12 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group return err; } + if (gt) { + xe_gt_stats_incr(gt, + XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_US, + xe_gt_stats_ktime_us_delta(start)); + } + if (need_resume) xe_hw_engine_group_resume_faulting_lr_jobs(group); @@ -236,7 +261,9 @@ static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group { long timeout; struct xe_exec_queue *q; + struct xe_gt *gt = NULL; struct dma_fence *fence; + ktime_t start = xe_gt_stats_ktime_get(); lockdep_assert_held_write(&group->mode_sem); @@ -244,18 +271,26 @@ static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group if (xe_vm_in_lr_mode(q->vm)) continue; + xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT, 1); fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); timeout = dma_fence_wait(fence, false); dma_fence_put(fence); + gt = q->gt; if (timeout < 0) return -ETIME; } + if (gt) { + xe_gt_stats_incr(gt, + XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_US, + xe_gt_stats_ktime_us_delta(start)); + } + return 0; } -static int switch_mode(struct xe_hw_engine_group *group) +static int switch_mode(struct xe_hw_engine_group *group, bool has_deps) { int err = 0; enum xe_hw_engine_group_execution_mode new_mode; @@ -265,7 +300,8 @@ static int switch_mode(struct xe_hw_engine_group *group) switch (group->cur_mode) { case EXEC_MODE_LR: new_mode = EXEC_MODE_DMA_FENCE; - err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group, + has_deps); break; case EXEC_MODE_DMA_FENCE: new_mode = EXEC_MODE_LR; @@ -281,19 +317,36 @@ static int switch_mode(struct xe_hw_engine_group *group) return 0; } +static int wait_syncs(struct xe_sync_entry *syncs, int num_syncs) +{ + int err, i; + + for (i = 0; i < num_syncs; ++i) { + err = xe_sync_entry_wait(syncs + i); + if (err) + return err; + } + + return 0; +} + /** * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode * @group: The hw engine group * @new_mode: The new execution mode * @previous_mode: Pointer to the previous mode provided for use by caller + * @syncs: Syncs from exec IOCTL + * @num_syncs: Number of syncs from exec IOCTL * * Return: 0 if successful, -EINTR if locking failed. */ int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, enum xe_hw_engine_group_execution_mode new_mode, - enum xe_hw_engine_group_execution_mode *previous_mode) + enum xe_hw_engine_group_execution_mode *previous_mode, + struct xe_sync_entry *syncs, int num_syncs) __acquires(&group->mode_sem) { + bool has_deps = !!num_syncs; int err = down_read_interruptible(&group->mode_sem); if (err) @@ -303,15 +356,25 @@ __acquires(&group->mode_sem) if (new_mode != group->cur_mode) { up_read(&group->mode_sem); +retry: err = down_write_killable(&group->mode_sem); if (err) return err; if (new_mode != group->cur_mode) { - err = switch_mode(group); + err = switch_mode(group, has_deps); if (err) { up_write(&group->mode_sem); - return err; + + if (err != -EAGAIN) + return err; + + err = wait_syncs(syncs, num_syncs); + if (err) + return err; + + has_deps = false; + goto retry; } } downgrade_write(&group->mode_sem); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index 797ee81acbf2..8b17ccd30b70 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -11,6 +11,7 @@ struct drm_device; struct xe_exec_queue; struct xe_gt; +struct xe_sync_entry; int xe_hw_engine_setup_groups(struct xe_gt *gt); @@ -19,7 +20,8 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, enum xe_hw_engine_group_execution_mode new_mode, - enum xe_hw_engine_group_execution_mode *previous_mode); + enum xe_hw_engine_group_execution_mode *previous_mode, + struct xe_sync_entry *syncs, int num_syncs); void xe_hw_engine_group_put(struct xe_hw_engine_group *group); enum xe_hw_engine_group_execution_mode diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 97879daeefc1..ff2aea52ef75 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -502,7 +502,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at int ret = 0; - xe_pm_runtime_get(hwmon->xe); + guard(xe_pm_runtime)(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); @@ -521,8 +521,6 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at mutex_unlock(&hwmon->hwmon_lock); - xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); @@ -604,7 +602,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) | REG_FIELD_PREP(PWR_LIM_TIME_Y, y); - xe_pm_runtime_get(hwmon->xe); + guard(xe_pm_runtime)(hwmon->xe); mutex_lock(&hwmon->hwmon_lock); @@ -616,8 +614,6 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a mutex_unlock(&hwmon->hwmon_lock); - xe_pm_runtime_put(hwmon->xe); - return count; } @@ -1124,37 +1120,25 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret; - xe_pm_runtime_get(hwmon->xe); + guard(xe_pm_runtime)(hwmon->xe); switch (type) { case hwmon_temp: - ret = xe_hwmon_temp_read(hwmon, attr, channel, val); - break; + return xe_hwmon_temp_read(hwmon, attr, channel, val); case hwmon_power: - ret = xe_hwmon_power_read(hwmon, attr, channel, val); - break; + return xe_hwmon_power_read(hwmon, attr, channel, val); case hwmon_curr: - ret = xe_hwmon_curr_read(hwmon, attr, channel, val); - break; + return xe_hwmon_curr_read(hwmon, attr, channel, val); case hwmon_in: - ret = xe_hwmon_in_read(hwmon, attr, channel, val); - break; + return xe_hwmon_in_read(hwmon, attr, channel, val); case hwmon_energy: - ret = xe_hwmon_energy_read(hwmon, attr, channel, val); - break; + return xe_hwmon_energy_read(hwmon, attr, channel, val); case hwmon_fan: - ret = xe_hwmon_fan_read(hwmon, attr, channel, val); - break; + return xe_hwmon_fan_read(hwmon, attr, channel, val); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - - xe_pm_runtime_put(hwmon->xe); - - return ret; } static int @@ -1162,25 +1146,17 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long val) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret; - xe_pm_runtime_get(hwmon->xe); + guard(xe_pm_runtime)(hwmon->xe); switch (type) { case hwmon_power: - ret = xe_hwmon_power_write(hwmon, attr, channel, val); - break; + return xe_hwmon_power_write(hwmon, attr, channel, val); case hwmon_curr: - ret = xe_hwmon_curr_write(hwmon, attr, channel, val); - break; + return xe_hwmon_curr_write(hwmon, attr, channel, val); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - - xe_pm_runtime_put(hwmon->xe); - - return ret; } static int xe_hwmon_read_label(struct device *dev, diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 0b5452be0c87..8eccbae05705 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -319,7 +319,7 @@ int xe_i2c_probe(struct xe_device *xe) struct xe_i2c *i2c; int ret; - if (xe->info.platform != XE_BATTLEMAGE) + if (!xe->info.has_i2c) return 0; if (IS_SRIOV_VF(xe)) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 024e13e606ec..baf5d2c6e802 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -21,6 +21,7 @@ #include "xe_hw_error.h" #include "xe_i2c.h" #include "xe_memirq.h" +#include "xe_mert.h" #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" @@ -525,6 +526,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); xe_i2c_irq_handler(xe, master_ctl); + xe_mert_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 4dc1de482eee..3059ea6525bc 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -8,15 +8,18 @@ #include #include "regs/xe_gt_regs.h" +#include "regs/xe_mert_regs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_tlb_inval.h" #include "xe_lmtt.h" #include "xe_map.h" +#include "xe_mert.h" #include "xe_mmio.h" #include "xe_res_cursor.h" #include "xe_sriov.h" +#include "xe_tile.h" #include "xe_tile_sriov_printk.h" /** @@ -196,16 +199,22 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) struct xe_device *xe = tile_to_xe(tile); dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE); struct xe_gt *gt; + u32 config; u8 id; lmtt_debug(lmtt, "DIR offset %pad\n", &offset); lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); + config = LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K); + for_each_gt_on_tile(gt, tile, id) xe_mmio_write32(>->mmio, GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, - LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); + config); + + if (xe_device_has_mert(xe) && xe_tile_is_root(tile)) + xe_mmio_write32(&tile->mmio, MERT_LMEM_CFG, config); } /** @@ -262,19 +271,29 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) * @lmtt: the &xe_lmtt to invalidate * * Send requests to all GuCs on this tile to invalidate all TLBs. + * If the platform has a standalone MERT, also invalidate MERT's TLB. * * This function should be called only when running as a PF driver. */ void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) { + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_device *xe = lmtt_to_xe(lmtt); int err; - lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); err = lmtt_invalidate_hw(lmtt); if (err) - xe_tile_sriov_err(lmtt_to_tile(lmtt), "LMTT invalidation failed (%pe)", + xe_tile_sriov_err(tile, "LMTT invalidation failed (%pe)", ERR_PTR(err)); + + if (xe_device_has_mert(xe) && xe_tile_is_root(tile)) { + err = xe_mert_invalidate_lmtt(tile); + if (err) + xe_tile_sriov_err(tile, "MERT LMTT invalidation failed (%pe)", + ERR_PTR(err)); + } } static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b5083c99dd50..70eae7d03a27 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -44,6 +44,11 @@ #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_PRIORITY GENMASK_ULL(10, 9) +#define LRC_PRIORITY_LOW 0 +#define LRC_PRIORITY_NORMAL 1 +#define LRC_PRIORITY_HIGH 2 + /* * Layout of the LRC and associated data allocated as * lrc->bo: @@ -91,13 +96,19 @@ gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) return false; } -size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) +/** + * xe_gt_lrc_hang_replay_size() - Hang replay size + * @gt: The GT + * @class: Hardware engine class + * + * Determine size of GPU hang replay state for a GT and hardware engine class. + * + * Return: Size of GPU hang replay size + */ +size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); - size_t size; - - /* Per-process HW status page (PPHWSP) */ - size = LRC_PPHWSP_SIZE; + size_t size = 0; /* Engine context image */ switch (class) { @@ -123,11 +134,18 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) size += 1 * SZ_4K; } + return size; +} + +size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) +{ + size_t size = xe_gt_lrc_hang_replay_size(gt, class); + /* Add indirect ring state page */ if (xe_gt_has_indirect_ring_state(gt)) size += LRC_INDIRECT_RING_STATE_SIZE; - return size; + return size + LRC_PPHWSP_SIZE; } /* @@ -1386,8 +1404,33 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return 0; } +static u8 xe_multi_queue_prio_to_lrc(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + + xe_assert(xe, (priority >= XE_MULTI_QUEUE_PRIORITY_LOW && + priority <= XE_MULTI_QUEUE_PRIORITY_HIGH)); + + /* xe_multi_queue_priority is directly mapped to LRC priority values */ + return priority; +} + +/** + * xe_lrc_set_multi_queue_priority() - Set multi queue priority in LRC + * @lrc: Logical Ring Context + * @priority: Multi queue priority of the exec queue + * + * Convert @priority to LRC multi queue priority and update the @lrc descriptor + */ +void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) +{ + lrc->desc &= ~LRC_PRIORITY; + lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority)); +} + static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size, u16 msix_vec, + struct xe_vm *vm, void *replay_state, u32 ring_size, + u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; @@ -1402,6 +1445,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, kref_init(&lrc->refcount); lrc->gt = gt; + lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); lrc->size = lrc_size; lrc->flags = 0; lrc->ring.size = ring_size; @@ -1438,11 +1482,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (gt->default_lrc[hwe->class]) { + if (gt->default_lrc[hwe->class] || replay_state) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, lrc_size - LRC_PPHWSP_SIZE); + if (replay_state) + xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, + replay_state, lrc->replay_size); } else { void *init_data = empty_lrc_data(hwe); @@ -1550,6 +1597,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * xe_lrc_create - Create a LRC * @hwe: Hardware Engine * @vm: The VM (address space) + * @replay_state: GPU hang replay state * @ring_size: LRC ring size * @msix_vec: MSI-X interrupt vector (for platforms that support it) * @flags: LRC initialization flags @@ -1560,7 +1608,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * upon failure. */ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size, u16 msix_vec, u32 flags) + void *replay_state, u32 ring_size, u16 msix_vec, u32 flags) { struct xe_lrc *lrc; int err; @@ -1569,7 +1617,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, if (!lrc) return ERR_PTR(-ENOMEM); - err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags); + err = xe_lrc_init(lrc, hwe, vm, replay_state, ring_size, msix_vec, flags); if (err) { kfree(lrc); return ERR_PTR(err); @@ -2235,6 +2283,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->size; + snapshot->replay_offset = 0; + snapshot->replay_size = lrc->replay_size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); @@ -2305,6 +2355,9 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer } drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); + drm_printf(p, "\n\t[HWCTX].replay_offset: 0x%lx\n", snapshot->replay_offset); + drm_printf(p, "\n\t[HWCTX].replay_length: 0x%lx\n", snapshot->replay_size); + drm_puts(p, "\t[HWCTX].data: "); for (; i < snapshot->lrc_size; i += sizeof(u32)) { u32 *val = snapshot->lrc_snapshot + i; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 2fb628da5c43..8acf85273c1a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -13,6 +13,7 @@ struct drm_printer; struct xe_bb; struct xe_device; struct xe_exec_queue; +enum xe_multi_queue_priority; enum xe_engine_class; struct xe_gt; struct xe_hw_engine; @@ -23,6 +24,7 @@ struct xe_lrc_snapshot { struct xe_bo *lrc_bo; void *lrc_snapshot; unsigned long lrc_size, lrc_offset; + unsigned long replay_size, replay_offset; u32 context_desc; u32 ring_addr; @@ -49,7 +51,7 @@ struct xe_lrc_snapshot { #define XE_LRC_CREATE_USER_CTX BIT(2) struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size, u16 msix_vec, u32 flags); + void *replay_state, u32 ring_size, u16 msix_vec, u32 flags); void xe_lrc_destroy(struct kref *ref); /** @@ -86,6 +88,7 @@ static inline size_t xe_lrc_ring_size(void) return SZ_16K; } +size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class); size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); u32 xe_lrc_regs_offset(struct xe_lrc *lrc); @@ -133,6 +136,8 @@ void xe_lrc_dump_default(struct drm_printer *p, u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); +void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority); + struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index e9883706e004..a4373d280c39 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,6 +25,9 @@ struct xe_lrc { /** @size: size of the lrc and optional indirect ring state */ u32 size; + /** @replay_size: Size LRC needed for replaying a hang */ + u32 replay_size; + /** @gt: gt which this LRC belongs to */ struct xe_gt *gt; diff --git a/drivers/gpu/drm/xe/xe_mert.c b/drivers/gpu/drm/xe/xe_mert.c new file mode 100644 index 000000000000..f7689e922953 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mert.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2025, Intel Corporation. All rights reserved. + */ + +#include "regs/xe_irq_regs.h" +#include "regs/xe_mert_regs.h" + +#include "xe_device.h" +#include "xe_mert.h" +#include "xe_mmio.h" +#include "xe_tile.h" + +/** + * xe_mert_invalidate_lmtt - Invalidate MERT LMTT + * @tile: the &xe_tile + * + * Trigger invalidation of the MERT LMTT and wait for completion. + * + * Return: 0 on success or -ETIMEDOUT in case of a timeout. + */ +int xe_mert_invalidate_lmtt(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_mert *mert = &tile->mert; + const long timeout = HZ / 4; + unsigned long flags; + + xe_assert(xe, xe_device_has_mert(xe)); + xe_assert(xe, xe_tile_is_root(tile)); + + spin_lock_irqsave(&mert->lock, flags); + if (!mert->tlb_inv_triggered) { + mert->tlb_inv_triggered = true; + reinit_completion(&mert->tlb_inv_done); + xe_mmio_write32(&tile->mmio, MERT_TLB_INV_DESC_A, MERT_TLB_INV_DESC_A_VALID); + } + spin_unlock_irqrestore(&mert->lock, flags); + + if (!wait_for_completion_timeout(&mert->tlb_inv_done, timeout)) + return -ETIMEDOUT; + + return 0; +} + +/** + * xe_mert_irq_handler - Handler for MERT interrupts + * @xe: the &xe_device + * @master_ctl: interrupt register + * + * Handle interrupts generated by MERT. + */ +void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + unsigned long flags; + u32 reg_val; + u8 err; + + if (!(master_ctl & SOC_H2DMEMINT_IRQ)) + return; + + reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT); + xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0); + + err = FIELD_GET(MERT_TLB_CT_ERROR_MASK, reg_val); + if (err == MERT_TLB_CT_LMTT_FAULT) + drm_dbg(&xe->drm, "MERT catastrophic error: LMTT fault (VF%u)\n", + FIELD_GET(MERT_TLB_CT_VFID_MASK, reg_val)); + else if (err) + drm_dbg(&xe->drm, "MERT catastrophic error: Unexpected fault (0x%x)\n", err); + + spin_lock_irqsave(&tile->mert.lock, flags); + if (tile->mert.tlb_inv_triggered) { + reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_INV_DESC_A); + if (!(reg_val & MERT_TLB_INV_DESC_A_VALID)) { + tile->mert.tlb_inv_triggered = false; + complete_all(&tile->mert.tlb_inv_done); + } + } + spin_unlock_irqrestore(&tile->mert.lock, flags); +} diff --git a/drivers/gpu/drm/xe/xe_mert.h b/drivers/gpu/drm/xe/xe_mert.h new file mode 100644 index 000000000000..2e14c5dec008 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mert.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2025, Intel Corporation. All rights reserved. + */ + +#ifndef __XE_MERT_H__ +#define __XE_MERT_H__ + +#include +#include +#include + +struct xe_device; +struct xe_tile; + +struct xe_mert { + /** @lock: protects the TLB invalidation status */ + spinlock_t lock; + /** @tlb_inv_triggered: indicates if TLB invalidation was triggered */ + bool tlb_inv_triggered; + /** @mert.tlb_inv_done: completion of TLB invalidation */ + struct completion tlb_inv_done; +}; + +#ifdef CONFIG_PCI_IOV +int xe_mert_invalidate_lmtt(struct xe_tile *tile); +void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl); +#else +static inline void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl) { } +#endif + +#endif /* __XE_MERT_H__ */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2184af413b91..f3b66b55acfb 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -34,6 +34,7 @@ #include "xe_res_cursor.h" #include "xe_sa.h" #include "xe_sched_job.h" +#include "xe_sriov_vf_ccs.h" #include "xe_sync.h" #include "xe_trace_bo.h" #include "xe_validation.h" @@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, u32 batch_size, batch_size_allocated; struct xe_device *xe = gt_to_xe(gt); struct xe_res_cursor src_it, ccs_it; + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_sa_manager *bb_pool; u64 size = xe_bo_size(src_bo); struct xe_bb *bb = NULL; u64 src_L0, src_L0_ofs; u32 src_L0_pt; int err; + ctx = &xe->sriov.vf.ccs.contexts[read_write]; + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), @@ -1141,11 +1146,15 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } + bb_pool = ctx->mem.ccs_bb_pool; + guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); + xe_sa_bo_swap_shadow(bb_pool); + bb = xe_bb_ccs_new(gt, batch_size, read_write); if (IS_ERR(bb)) { drm_err(&xe->drm, "BB allocation failed.\n"); err = PTR_ERR(bb); - goto err_ret; + return err; } batch_size_allocated = batch_size; @@ -1194,10 +1203,52 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, xe_assert(xe, (batch_size_allocated == bb->len)); src_bo->bb_ccs[read_write] = bb; + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + xe_sa_bo_sync_shadow(bb->bo); return 0; +} -err_ret: - return err; +/** + * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer + * content. + * @src_bo: The buffer object @src is currently bound to. + * @read_write : Creates BB commands for CCS read/write. + * + * Directly clearing the BB lacks atomicity and can lead to undefined + * behavior if the vCPU is halted mid-operation during the clearing + * process. To avoid this issue, we use a shadow buffer object approach. + * + * First swap the SA BO address with the shadow BO, perform the clearing + * operation on the BB, update the shadow BO in the ring buffer, then + * sync the shadow and the actual buffer to maintain consistency. + * + * Returns: None. + */ +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write) +{ + struct xe_bb *bb = src_bo->bb_ccs[read_write]; + struct xe_device *xe = xe_bo_device(src_bo); + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_sa_manager *bb_pool; + u32 *cs; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + ctx = &xe->sriov.vf.ccs.contexts[read_write]; + bb_pool = ctx->mem.ccs_bb_pool; + + guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); + xe_sa_bo_swap_shadow(bb_pool); + + cs = xe_sa_bo_cpu_addr(bb->bo); + memset(cs, MI_NOOP, bb->len * sizeof(u32)); + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + + xe_sa_bo_sync_shadow(bb->bo); + + xe_bb_free(bb, NULL); + src_bo->bb_ccs[read_write] = NULL; } /** diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 260e298e5dd7..464c05dde1ba 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, struct xe_bo *src_bo, enum xe_sriov_vf_ccs_rw_ctxs read_write); +void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write); + struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset, diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 6613d3b48a84..0b7225bd77e0 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -811,26 +811,20 @@ int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); enum xe_force_wake_domains domain; struct xe_mocs_info table; - unsigned int fw_ref, flags; - int err = 0; + unsigned int flags; flags = get_mocs_settings(xe, &table); domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT; - xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); - if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { - err = -ETIMEDOUT; - goto err_fw; - } + guard(xe_pm_runtime_noresume)(xe); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), domain); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, domain)) + return -ETIMEDOUT; table.ops->dump(&table, flags, gt, p); -err_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); - return err; + return 0; } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 33f4ac82fc80..01510061d4d4 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -10,6 +10,7 @@ #include "xe_device_types.h" #include "xe_mmio.h" #include "xe_nvm.h" +#include "xe_pcode_api.h" #include "regs/xe_gsc_regs.h" #include "xe_sriov.h" @@ -45,39 +46,50 @@ static bool xe_nvm_non_posted_erase(struct xe_device *xe) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - if (xe->info.platform != XE_BATTLEMAGE) + switch (xe->info.platform) { + case XE_CRESCENTISLAND: + case XE_BATTLEMAGE: + return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); + default: return false; - return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & - NVM_NON_POSTED_ERASE_CHICKEN_BIT); + } } static bool xe_nvm_writable_override(struct xe_device *xe) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); bool writable_override; - resource_size_t base; + struct xe_reg reg; + u32 test_bit; switch (xe->info.platform) { + case XE_CRESCENTISLAND: + reg = PCODE_SCRATCH(0); + test_bit = FDO_MODE; + break; case XE_BATTLEMAGE: - base = DG2_GSC_HECI2_BASE; + reg = HECI_FWSTS2(DG2_GSC_HECI2_BASE); + test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE; break; case XE_PVC: - base = PVC_GSC_HECI2_BASE; + reg = HECI_FWSTS2(PVC_GSC_HECI2_BASE); + test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE; break; case XE_DG2: - base = DG2_GSC_HECI2_BASE; + reg = HECI_FWSTS2(DG2_GSC_HECI2_BASE); + test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE; break; case XE_DG1: - base = DG1_GSC_HECI2_BASE; + reg = HECI_FWSTS2(DG1_GSC_HECI2_BASE); + test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE; break; default: drm_err(&xe->drm, "Unknown platform\n"); return true; } - writable_override = - !(xe_mmio_read32(mmio, HECI_FWSTS2(base)) & - HECI_FW_STATUS_2_NVM_ACCESS_MODE); + writable_override = !(xe_mmio_read32(mmio, reg) & test_bit); if (writable_override) drm_info(&xe->drm, "NVM access overridden by jumper\n"); return writable_override; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index f8bb28ab8124..abf87fe0b345 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1941,6 +1941,7 @@ static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: case DRM_XE_OA_UNIT_TYPE_OAM_SAG: + case DRM_XE_OA_UNIT_TYPE_MERT: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -1966,10 +1967,6 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) enum xe_hw_engine_id id; int ret = 0; - /* If not provided, OA unit defaults to OA unit 0 as per uapi */ - if (!param->oa_unit) - param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; - /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, @@ -2035,7 +2032,15 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (ret) return ret; + /* If not provided, OA unit defaults to OA unit 0 as per uapi */ + if (!param.oa_unit) + param.oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; + if (param.exec_queue_id > 0) { + /* An exec_queue is only needed for OAR/OAC functionality on OAG */ + if (XE_IOCTL_DBG(oa->xe, param.oa_unit->type != DRM_XE_OA_UNIT_TYPE_OAG)) + return -EINVAL; + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; @@ -2224,6 +2229,8 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + { .start = 0x145194, .end = 0x145194 }, /* SYS_MEM_LAT_MEASURE */ + { .start = 0x145340, .end = 0x14537C }, /* MERTSS_PES_0 - MERTSS_PES_7 */ {}, }; @@ -2515,7 +2522,12 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) - return 1; + /* + * Mert OA unit belongs to the SoC, not a gt, so should be accessed using + * xe_root_tile_mmio(). However, for all known platforms this is the same as + * accessing via xe_root_mmio_gt()->mmio. + */ + return xe_device_has_mert(gt_to_xe(gt)) ? 2 : 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ else @@ -2570,40 +2582,57 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) static struct xe_oa_regs __oam_regs(u32 base) { return (struct xe_oa_regs) { - base, - OAM_HEAD_POINTER(base), - OAM_TAIL_POINTER(base), - OAM_BUFFER(base), - OAM_CONTEXT_CONTROL(base), - OAM_CONTROL(base), - OAM_DEBUG(base), - OAM_STATUS(base), - OAM_CONTROL_COUNTER_SEL_MASK, + .base = base, + .oa_head_ptr = OAM_HEAD_POINTER(base), + .oa_tail_ptr = OAM_TAIL_POINTER(base), + .oa_buffer = OAM_BUFFER(base), + .oa_ctx_ctrl = OAM_CONTEXT_CONTROL(base), + .oa_ctrl = OAM_CONTROL(base), + .oa_debug = OAM_DEBUG(base), + .oa_status = OAM_STATUS(base), + .oa_mmio_trg = OAM_MMIO_TRG(base), + .oa_ctrl_counter_select_mask = OAM_CONTROL_COUNTER_SEL_MASK, }; } static struct xe_oa_regs __oag_regs(void) { return (struct xe_oa_regs) { - 0, - OAG_OAHEADPTR, - OAG_OATAILPTR, - OAG_OABUFFER, - OAG_OAGLBCTXCTRL, - OAG_OACONTROL, - OAG_OA_DEBUG, - OAG_OASTATUS, - OAG_OACONTROL_OA_COUNTER_SEL_MASK, + .base = 0, + .oa_head_ptr = OAG_OAHEADPTR, + .oa_tail_ptr = OAG_OATAILPTR, + .oa_buffer = OAG_OABUFFER, + .oa_ctx_ctrl = OAG_OAGLBCTXCTRL, + .oa_ctrl = OAG_OACONTROL, + .oa_debug = OAG_OA_DEBUG, + .oa_status = OAG_OASTATUS, + .oa_mmio_trg = OAG_MMIOTRIGGER, + .oa_ctrl_counter_select_mask = OAG_OACONTROL_OA_COUNTER_SEL_MASK, + }; +} + +static struct xe_oa_regs __oamert_regs(void) +{ + return (struct xe_oa_regs) { + .base = 0, + .oa_head_ptr = OAMERT_HEAD_POINTER, + .oa_tail_ptr = OAMERT_TAIL_POINTER, + .oa_buffer = OAMERT_BUFFER, + .oa_ctx_ctrl = OAMERT_CONTEXT_CONTROL, + .oa_ctrl = OAMERT_CONTROL, + .oa_debug = OAMERT_DEBUG, + .oa_status = OAMERT_STATUS, + .oa_mmio_trg = OAMERT_MMIO_TRG, + .oa_ctrl_counter_select_mask = OAM_CONTROL_COUNTER_SEL_MASK, }; } static void __xe_oa_init_oa_units(struct xe_gt *gt) { - /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ const u32 oam_base_addr[] = { - [XE_OAM_UNIT_SAG] = 0x13000, - [XE_OAM_UNIT_SCMI_0] = 0x14000, - [XE_OAM_UNIT_SCMI_1] = 0x14800, + [XE_OAM_UNIT_SAG] = XE_OAM_SAG_BASE, + [XE_OAM_UNIT_SCMI_0] = XE_OAM_SCMI_0_BASE, + [XE_OAM_UNIT_SCMI_1] = XE_OAM_SCMI_1_BASE, }; int i, num_units = gt->oa.num_oa_units; @@ -2611,8 +2640,15 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) struct xe_oa_unit *u = >->oa.oa_unit[i]; if (xe_gt_is_main_type(gt)) { - u->regs = __oag_regs(); - u->type = DRM_XE_OA_UNIT_TYPE_OAG; + if (!i) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else { + xe_gt_assert(gt, xe_device_has_mert(gt_to_xe(gt))); + xe_gt_assert(gt, gt == xe_root_mmio_gt(gt_to_xe(gt))); + u->regs = __oamert_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_MERT; + } } else { xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); u->regs = __oam_regs(oam_base_addr[i]); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index cf080f412189..08cc8d7c2215 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -87,6 +87,7 @@ struct xe_oa_regs { struct xe_reg oa_ctrl; struct xe_reg oa_debug; struct xe_reg oa_status; + struct xe_reg oa_mmio_trg; u32 oa_ctrl_counter_select_mask; }; diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c new file mode 100644 index 000000000000..fd8c33761127 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_page_reclaim.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include +#include +#include +#include + +#include "xe_page_reclaim.h" + +#include "regs/xe_gt_regs.h" +#include "xe_assert.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_pat.h" +#include "xe_sa.h" +#include "xe_tlb_inval_types.h" +#include "xe_vm.h" + +/** + * xe_page_reclaim_skip() - Decide whether PRL should be skipped for a VMA + * @tile: Tile owning the VMA + * @vma: VMA under consideration + * + * PPC flushing may be handled by HW for specific PAT encodings. + * Skip PPC flushing/Page Reclaim for scenarios below due to redundant + * flushes. + * - pat_index is transient display (1) + * + * Return: true when page reclamation is unnecessary, false otherwise. + */ +bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma) +{ + u8 l3_policy; + + l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index); + + /* + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC + * Transient display flushes is taken care by HW, l3_policy = 1. + * + * HW will sequence these transient flushes at various sync points so + * any event of page reclamation will hit these sync points before + * page reclamation could execute. + */ + return (l3_policy == XE_L3_POLICY_XD); +} + +/** + * xe_page_reclaim_create_prl_bo() - Back a PRL with a suballocated GGTT BO + * @tlb_inval: TLB invalidation frontend associated with the request + * @prl: page reclaim list data that bo will copy from + * @fence: tlb invalidation fence that page reclaim action is paired to + * + * Suballocates a 4K BO out of the tile reclaim pool, copies the PRL CPU + * copy into the BO and queues the buffer for release when @fence signals. + * + * Return: struct drm_suballoc pointer on success or ERR_PTR on failure. + */ +struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval, + struct xe_page_reclaim_list *prl, + struct xe_tlb_inval_fence *fence) +{ + struct xe_gt *gt = container_of(tlb_inval, struct xe_gt, tlb_inval); + struct xe_tile *tile = gt_to_tile(gt); + /* (+1) for NULL page_reclaim_entry to indicate end of list */ + int prl_size = min(prl->num_entries + 1, XE_PAGE_RECLAIM_MAX_ENTRIES) * + sizeof(struct xe_guc_page_reclaim_entry); + struct drm_suballoc *prl_sa; + + /* Maximum size of PRL is 1 4K-page */ + prl_sa = __xe_sa_bo_new(tile->mem.reclaim_pool, + prl_size, GFP_ATOMIC); + if (IS_ERR(prl_sa)) + return prl_sa; + + memcpy(xe_sa_bo_cpu_addr(prl_sa), prl->entries, + prl_size); + xe_sa_bo_flush_write(prl_sa); + /* Queue up sa_bo_free on tlb invalidation fence signal */ + xe_sa_bo_free(prl_sa, &fence->base); + + return prl_sa; +} + +/** + * xe_page_reclaim_list_invalidate() - Mark a PRL as invalid + * @prl: Page reclaim list to reset + * + * Clears the entries pointer and marks the list as invalid so + * future use knows PRL is unusable. It is expected that the entries + * have already been released. + */ +void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl) +{ + xe_page_reclaim_entries_put(prl->entries); + prl->entries = NULL; + prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST; +} + +/** + * xe_page_reclaim_list_init() - Initialize a page reclaim list + * @prl: Page reclaim list to initialize + * + * NULLs both values in list to prepare on initalization. + */ +void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl) +{ + // xe_page_reclaim_list_invalidate(prl); + prl->entries = NULL; + prl->num_entries = 0; +} + +/** + * xe_page_reclaim_list_alloc_entries() - Allocate page reclaim list entries + * @prl: Page reclaim list to allocate entries for + * + * Allocate one 4K page for the PRL entries, otherwise assign prl->entries to NULL. + */ +int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl) +{ + struct page *page; + + if (XE_WARN_ON(prl->entries)) + return 0; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (page) { + prl->entries = page_address(page); + prl->num_entries = 0; + } + + return page ? 0 : -ENOMEM; +} diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h new file mode 100644 index 000000000000..a4f58e0ce9b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_page_reclaim.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGE_RECLAIM_H_ +#define _XE_PAGE_RECLAIM_H_ + +#include +#include +#include +#include +#include +#include + +#define XE_PAGE_RECLAIM_MAX_ENTRIES 512 +#define XE_PAGE_RECLAIM_LIST_MAX_SIZE SZ_4K + +struct xe_tlb_inval; +struct xe_tlb_inval_fence; +struct xe_tile; +struct xe_vma; + +struct xe_guc_page_reclaim_entry { + u64 qw; +/* valid reclaim entry bit */ +#define XE_PAGE_RECLAIM_VALID BIT_ULL(0) +/* + * offset order of page size to be reclaimed + * page_size = 1 << (XE_PTE_SHIFT + reclamation_size) + */ +#define XE_PAGE_RECLAIM_SIZE GENMASK_ULL(6, 1) +#define XE_PAGE_RECLAIM_RSVD_0 GENMASK_ULL(11, 7) +/* lower 20 bits of the physical address */ +#define XE_PAGE_RECLAIM_ADDR_LO GENMASK_ULL(31, 12) +/* upper 20 bits of the physical address */ +#define XE_PAGE_RECLAIM_ADDR_HI GENMASK_ULL(51, 32) +#define XE_PAGE_RECLAIM_RSVD_1 GENMASK_ULL(63, 52) +} __packed; + +struct xe_page_reclaim_list { + /** @entries: array of page reclaim entries, page allocated */ + struct xe_guc_page_reclaim_entry *entries; + /** @num_entries: number of entries */ + int num_entries; +#define XE_PAGE_RECLAIM_INVALID_LIST -1 +}; + +/** + * xe_page_reclaim_list_is_new() - Check if PRL is new allocation + * @prl: Pointer to page reclaim list + * + * PRL indicates it hasn't been allocated through both values being NULL + */ +static inline bool xe_page_reclaim_list_is_new(struct xe_page_reclaim_list *prl) +{ + return !prl->entries && prl->num_entries == 0; +} + +/** + * xe_page_reclaim_list_valid() - Check if the page reclaim list is valid + * @prl: Pointer to page reclaim list + * + * PRL uses the XE_PAGE_RECLAIM_INVALID_LIST to indicate that a PRL + * is unusable. + */ +static inline bool xe_page_reclaim_list_valid(struct xe_page_reclaim_list *prl) +{ + return !xe_page_reclaim_list_is_new(prl) && + prl->num_entries != XE_PAGE_RECLAIM_INVALID_LIST; +} + +bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma); +struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval, + struct xe_page_reclaim_list *prl, + struct xe_tlb_inval_fence *fence); +void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl); +void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl); +int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl); +/** + * xe_page_reclaim_entries_get() - Increment the reference count of page reclaim entries. + * @entries: Pointer to the array of page reclaim entries. + * + * This function increments the reference count of the backing page. + */ +static inline void xe_page_reclaim_entries_get(struct xe_guc_page_reclaim_entry *entries) +{ + if (entries) + get_page(virt_to_page(entries)); +} + +/** + * xe_page_reclaim_entries_put() - Decrement the reference count of page reclaim entries. + * @entries: Pointer to the array of page reclaim entries. + * + * This function decrements the reference count of the backing page + * and frees it if the count reaches zero. + */ +static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry *entries) +{ + if (entries) + put_page(virt_to_page(entries)); +} + +#endif /* _XE_PAGE_RECLAIM_H_ */ diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index afb06598b6e1..6bee53d6ffc3 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -223,22 +223,22 @@ static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue, static void xe_pagefault_print(struct xe_pagefault *pf) { - xe_gt_dbg(pf->gt, "\n\tASID: %d\n" - "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" - "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->consumer.asid, - upper_32_bits(pf->consumer.page_addr), - lower_32_bits(pf->consumer.page_addr), - pf->consumer.fault_type, - pf->consumer.access_type, - pf->consumer.fault_level, - pf->consumer.engine_class, - xe_hw_engine_class_to_str(pf->consumer.engine_class), - pf->consumer.engine_instance); + xe_gt_info(pf->gt, "\n\tASID: %d\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d %s\n" + "\tEngineInstance: %d\n", + pf->consumer.asid, + upper_32_bits(pf->consumer.page_addr), + lower_32_bits(pf->consumer.page_addr), + pf->consumer.fault_type, + pf->consumer.access_type, + pf->consumer.fault_level, + pf->consumer.engine_class, + xe_hw_engine_class_to_str(pf->consumer.engine_class), + pf->consumer.engine_instance); } static void xe_pagefault_queue_work(struct work_struct *w) @@ -260,8 +260,8 @@ static void xe_pagefault_queue_work(struct work_struct *w) err = xe_pagefault_service(&pf); if (err) { xe_pagefault_print(&pf); - xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n", - ERR_PTR(err)); + xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); } pf.producer.ops->ack_fault(&pf, err); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 68171cceea18..2c3375e0250b 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -9,6 +9,7 @@ #include +#include "regs/xe_gt_regs.h" #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -50,8 +51,37 @@ #define XELP_PAT_WC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1) #define XELP_PAT_UC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0) +#define PAT_LABEL_LEN 20 + static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" }; +static void xe_pat_index_label(char *label, size_t len, int index) +{ + snprintf(label, len, "PAT[%2d] ", index); +} + +static void xelp_pat_entry_dump(struct drm_printer *p, int index, u32 pat) +{ + u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); + + drm_printf(p, "PAT[%2d] = %s (%#8x)\n", index, + XELP_MEM_TYPE_STR_MAP[mem_type], pat); +} + +static void xehpc_pat_entry_dump(struct drm_printer *p, int index, u32 pat) +{ + drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", index, + REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat), + REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat); +} + +static void xelpg_pat_entry_dump(struct drm_printer *p, int index, u32 pat) +{ + drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", index, + REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat), + REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat); +} + struct xe_pat_ops { void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries); @@ -196,6 +226,19 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) return xe->pat.table[pat_index].coh_mode; } +bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index) +{ + WARN_ON(pat_index >= xe->pat.n_entries); + return !!(xe->pat.table[pat_index].value & XE2_COMP_EN); +} + +u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index) +{ + WARN_ON(pat_index >= xe->pat.n_entries); + + return REG_FIELD_GET(XE2_L3_POLICY, xe->pat.table[pat_index].value); +} + static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries) { @@ -233,24 +276,20 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta static int xelp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; int i; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); for (i = 0; i < xe->pat.n_entries; i++) { u32 pat = xe_mmio_read32(>->mmio, XE_REG(_PAT_INDEX(i))); - u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); - drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, - XELP_MEM_TYPE_STR_MAP[mem_type], pat); + xelp_pat_entry_dump(p, i, pat); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -262,26 +301,20 @@ static const struct xe_pat_ops xelp_pat_ops = { static int xehp_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; int i; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); for (i = 0; i < xe->pat.n_entries; i++) { u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - u8 mem_type; - mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); - - drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, - XELP_MEM_TYPE_STR_MAP[mem_type], pat); + xelp_pat_entry_dump(p, i, pat); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -293,11 +326,10 @@ static const struct xe_pat_ops xehp_pat_ops = { static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; int i; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -305,12 +337,9 @@ static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p) for (i = 0; i < xe->pat.n_entries; i++) { u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i, - REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat), - REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat); + xehpc_pat_entry_dump(p, i, pat); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -322,11 +351,10 @@ static const struct xe_pat_ops xehpc_pat_ops = { static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; int i; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; drm_printf(p, "PAT table:\n"); @@ -339,12 +367,9 @@ static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p) else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i, - REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat), - REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat); + xelpg_pat_entry_dump(p, i, pat); } - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -358,15 +383,38 @@ static const struct xe_pat_ops xelpg_pat_ops = { .dump = xelpg_dump, }; +static void xe2_pat_entry_dump(struct drm_printer *p, const char *label, u32 pat, bool rsvd) +{ + drm_printf(p, "%s= [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", label, + !!(pat & XE2_NO_PROMOTE), + !!(pat & XE2_COMP_EN), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat, rsvd ? " *" : ""); +} + +static void xe3p_xpc_pat_entry_dump(struct drm_printer *p, const char *label, u32 pat, bool rsvd) +{ + drm_printf(p, "%s= [ %u, %u, %u, %u, %u ] (%#8x)%s\n", label, + !!(pat & XE2_NO_PROMOTE), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat, rsvd ? " *" : ""); +} + static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; u32 pat; int i; + char label[PAT_LABEL_LEN]; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; drm_printf(p, "PAT table: (* = reserved entry)\n"); @@ -377,14 +425,8 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i, - !!(pat & XE2_NO_PROMOTE), - !!(pat & XE2_COMP_EN), - REG_FIELD_GET(XE2_L3_CLOS, pat), - REG_FIELD_GET(XE2_L3_POLICY, pat), - REG_FIELD_GET(XE2_L4_POLICY, pat), - REG_FIELD_GET(XE2_COH_MODE, pat), - pat, xe->pat.table[i].valid ? "" : " *"); + xe_pat_index_label(label, sizeof(label), i); + xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); } /* @@ -397,16 +439,8 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); drm_printf(p, "Page Table Access:\n"); - drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", - !!(pat & XE2_NO_PROMOTE), - !!(pat & XE2_COMP_EN), - REG_FIELD_GET(XE2_L3_CLOS, pat), - REG_FIELD_GET(XE2_L3_POLICY, pat), - REG_FIELD_GET(XE2_L4_POLICY, pat), - REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + xe2_pat_entry_dump(p, "PTA_MODE", pat, false); - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -419,12 +453,12 @@ static const struct xe_pat_ops xe2_pat_ops = { static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; u32 pat; int i; + char label[PAT_LABEL_LEN]; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; drm_printf(p, "PAT table: (* = reserved entry)\n"); @@ -432,13 +466,8 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) for (i = 0; i < xe->pat.n_entries; i++) { pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i, - !!(pat & XE2_NO_PROMOTE), - REG_FIELD_GET(XE2_L3_CLOS, pat), - REG_FIELD_GET(XE2_L3_POLICY, pat), - REG_FIELD_GET(XE2_L4_POLICY, pat), - REG_FIELD_GET(XE2_COH_MODE, pat), - pat, xe->pat.table[i].valid ? "" : " *"); + xe_pat_index_label(label, sizeof(label), i); + xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); } /* @@ -448,15 +477,8 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); drm_printf(p, "Page Table Access:\n"); - drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u ] (%#8x)\n", - !!(pat & XE2_NO_PROMOTE), - REG_FIELD_GET(XE2_L3_CLOS, pat), - REG_FIELD_GET(XE2_L3_POLICY, pat), - REG_FIELD_GET(XE2_L4_POLICY, pat), - REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + xe3p_xpc_pat_entry_dump(p, "PTA_MODE", pat, false); - xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; } @@ -578,3 +600,65 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) return xe->pat.ops->dump(gt, p); } + +/** + * xe_pat_dump_sw_config() - Dump the software-configured GT PAT table into a drm printer. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + char label[PAT_LABEL_LEN]; + + if (!xe->pat.table || !xe->pat.n_entries) + return -EOPNOTSUPP; + + drm_printf(p, "PAT table:%s\n", GRAPHICS_VER(xe) >= 20 ? " (* = reserved entry)" : ""); + for (u32 i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe->pat.table[i].value; + + if (GRAPHICS_VERx100(xe) == 3511) { + xe_pat_index_label(label, sizeof(label), i); + xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); + } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { + xe_pat_index_label(label, sizeof(label), i); + xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); + } else if (xe->info.platform == XE_METEORLAKE) { + xelpg_pat_entry_dump(p, i, pat); + } else if (xe->info.platform == XE_PVC) { + xehpc_pat_entry_dump(p, i, pat); + } else if (xe->info.platform == XE_DG2 || GRAPHICS_VERx100(xe) <= 1210) { + xelp_pat_entry_dump(p, i, pat); + } else { + return -EOPNOTSUPP; + } + } + + if (xe->pat.pat_pta) { + u32 pat = xe->pat.pat_pta->value; + + drm_printf(p, "Page Table Access:\n"); + xe2_pat_entry_dump(p, "PTA_MODE", pat, false); + } + + if (xe->pat.pat_ats) { + u32 pat = xe->pat.pat_ats->value; + + drm_printf(p, "PCIe ATS/PASID:\n"); + xe2_pat_entry_dump(p, "PAT_ATS ", pat, false); + } + + drm_printf(p, "Cache Level:\n"); + drm_printf(p, "IDX[XE_CACHE_NONE] = %d\n", xe->pat.idx[XE_CACHE_NONE]); + drm_printf(p, "IDX[XE_CACHE_WT] = %d\n", xe->pat.idx[XE_CACHE_WT]); + drm_printf(p, "IDX[XE_CACHE_WB] = %d\n", xe->pat.idx[XE_CACHE_WB]); + if (GRAPHICS_VER(xe) >= 20) { + drm_printf(p, "IDX[XE_CACHE_NONE_COMPRESSION] = %d\n", + xe->pat.idx[XE_CACHE_NONE_COMPRESSION]); + } + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 05dae03a5f54..d5dadfb7f924 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -49,6 +49,7 @@ void xe_pat_init_early(struct xe_device *xe); void xe_pat_init(struct xe_gt *gt); int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); +int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p); /** * xe_pat_index_get_coh_mode - Extract the coherency mode for the given @@ -58,4 +59,24 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); */ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index); +/** + * xe_pat_index_get_comp_en - Extract the compression enable flag for + * the given pat_index. + * @xe: xe device + * @pat_index: The pat_index to query + * + * Return: true if compression is enabled for this pat_index, false otherwise. + */ +bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index); + +#define XE_L3_POLICY_WB 0 /* Write-back */ +#define XE_L3_POLICY_XD 1 /* WB - Transient Display */ +#define XE_L3_POLICY_UC 3 /* Uncached */ +/** + * xe_pat_index_get_l3_policy - Extract the L3 policy for the given pat_index. + * @xe: xe device + * @pat_index: The pat_index to query + */ +u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index); + #endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 9c9ea10d994c..18d4e6b5c319 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -108,6 +108,7 @@ static const struct xe_graphics_desc graphics_xe2 = { static const struct xe_graphics_desc graphics_xe3p_xpc = { XE2_GFX_FEATURES, + .has_indirect_ring_state = 1, .hw_engine_mask = GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) | GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), @@ -168,6 +169,7 @@ static const struct xe_device_desc tgl_desc = { .pre_gmdid_media_ip = &media_ip_xem, PLATFORM(TIGERLAKE), .dma_mask_size = 39, + .has_cached_pt = true, .has_display = true, .has_llc = true, .has_sriov = true, @@ -182,6 +184,7 @@ static const struct xe_device_desc rkl_desc = { .pre_gmdid_media_ip = &media_ip_xem, PLATFORM(ROCKETLAKE), .dma_mask_size = 39, + .has_cached_pt = true, .has_display = true, .has_llc = true, .max_gt_per_tile = 1, @@ -197,6 +200,7 @@ static const struct xe_device_desc adl_s_desc = { .pre_gmdid_media_ip = &media_ip_xem, PLATFORM(ALDERLAKE_S), .dma_mask_size = 39, + .has_cached_pt = true, .has_display = true, .has_llc = true, .has_sriov = true, @@ -217,6 +221,7 @@ static const struct xe_device_desc adl_p_desc = { .pre_gmdid_media_ip = &media_ip_xem, PLATFORM(ALDERLAKE_P), .dma_mask_size = 39, + .has_cached_pt = true, .has_display = true, .has_llc = true, .has_sriov = true, @@ -235,6 +240,7 @@ static const struct xe_device_desc adl_n_desc = { .pre_gmdid_media_ip = &media_ip_xem, PLATFORM(ALDERLAKE_N), .dma_mask_size = 39, + .has_cached_pt = true, .has_display = true, .has_llc = true, .has_sriov = true, @@ -361,7 +367,9 @@ static const struct xe_device_desc bmg_desc = { .has_mbx_power_limits = true, .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_i2c = true, .has_late_bind = true, + .has_pre_prod_wa = 1, .has_sriov = true, .has_mem_copy_instr = true, .max_gt_per_tile = 2, @@ -381,6 +389,7 @@ static const struct xe_device_desc ptl_desc = { .has_flat_ccs = 1, .has_sriov = true, .has_mem_copy_instr = true, + .has_pre_prod_wa = 1, .max_gt_per_tile = 2, .needs_scratch = true, .needs_shared_vf_gt_wq = true, @@ -394,6 +403,7 @@ static const struct xe_device_desc nvls_desc = { .has_display = true, .has_flat_ccs = 1, .has_mem_copy_instr = true, + .has_pre_prod_wa = 1, .max_gt_per_tile = 2, .require_force_probe = true, .va_bits = 48, @@ -406,7 +416,11 @@ static const struct xe_device_desc cri_desc = { .dma_mask_size = 52, .has_display = false, .has_flat_ccs = false, + .has_gsc_nvm = 1, + .has_i2c = true, .has_mbx_power_limits = true, + .has_mert = true, + .has_pre_prod_wa = 1, .has_sriov = true, .max_gt_per_tile = 2, .require_force_probe = true, @@ -663,6 +677,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.vram_flags = desc->vram_flags; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_cached_pt = desc->has_cached_pt; xe->info.has_fan_control = desc->has_fan_control; /* runtime fusing may force flat_ccs to disabled later */ xe->info.has_flat_ccs = desc->has_flat_ccs; @@ -670,8 +685,12 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; + xe->info.has_i2c = desc->has_i2c; xe->info.has_late_bind = desc->has_late_bind; xe->info.has_llc = desc->has_llc; + xe->info.has_mert = desc->has_mert; + xe->info.has_page_reclaim_hw_assist = desc->has_page_reclaim_hw_assist; + xe->info.has_pre_prod_wa = desc->has_pre_prod_wa; xe->info.has_pxp = desc->has_pxp; xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && desc->has_sriov; @@ -755,6 +774,7 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, gt->info.type = XE_GT_TYPE_MAIN; gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; + gt->info.multi_queue_engine_class_mask = graphics_desc->multi_queue_engine_class_mask; gt->info.engine_mask = graphics_desc->hw_engine_mask; /* @@ -1153,6 +1173,15 @@ static int xe_pci_runtime_suspend(struct device *dev) struct xe_device *xe = pdev_to_xe_device(pdev); int err; + /* + * We hold an additional reference to the runtime PM to keep PF in D0 + * during VFs lifetime, as our VFs do not implement the PM capability. + * This means we should never be runtime suspending as long as VFs are + * enabled. + */ + xe_assert(xe, !IS_SRIOV_VF(xe)); + xe_assert(xe, !pci_num_vf(pdev)); + err = xe_pm_runtime_suspend(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 9ff69c4843b0..3fd22034f03e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -219,7 +219,6 @@ static int pf_disable_vfs(struct xe_device *xe) int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct xe_device *xe = pdev_to_xe_device(pdev); - int ret; if (!IS_SRIOV_PF(xe)) return -ENODEV; @@ -233,14 +232,11 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs && pci_num_vf(pdev)) return -EBUSY; - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); if (num_vfs > 0) - ret = pf_enable_vfs(xe, num_vfs); + return pf_enable_vfs(xe, num_vfs); else - ret = pf_disable_vfs(xe); - xe_pm_runtime_put(xe); - - return ret; + return pf_disable_vfs(xe); } /** diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 9892c063a9c5..3bb51d155951 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -37,16 +37,21 @@ struct xe_device_desc { u8 require_force_probe:1; u8 is_dgfx:1; + u8 has_cached_pt:1; u8 has_display:1; u8 has_fan_control:1; u8 has_flat_ccs:1; u8 has_gsc_nvm:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; + u8 has_i2c:1; u8 has_late_bind:1; u8 has_llc:1; u8 has_mbx_power_limits:1; u8 has_mem_copy_instr:1; + u8 has_mert:1; + u8 has_pre_prod_wa:1; + u8 has_page_reclaim_hw_assist:1; u8 has_pxp:1; u8 has_sriov:1; u8 needs_scratch:1; @@ -58,6 +63,7 @@ struct xe_device_desc { struct xe_graphics_desc { u64 hw_engine_mask; /* hardware engines provided by graphics IP */ + u16 multi_queue_engine_class_mask; /* bitmask of engine classes which support multi queue */ u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 70dcd6625680..975892d6b230 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -77,11 +77,13 @@ #define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) /* PCODE_SCRATCH0 */ +#define BREADCRUMB_VERSION REG_GENMASK(31, 29) #define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) #define OVERFLOW_REG_OFFSET REG_GENMASK(14, 12) #define HISTORY_TRACKING REG_BIT(11) #define OVERFLOW_SUPPORT REG_BIT(10) #define AUXINFO_SUPPORT REG_BIT(9) +#define FDO_MODE REG_BIT(4) #define BOOT_STATUS REG_GENMASK(3, 1) #define CRITICAL_FAILURE 4 #define NON_CRITICAL_FAILURE 7 diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 766922530265..4390ba69610d 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -591,7 +591,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) } for_each_gt(gt, xe, id) { - err = xe_gt_suspend(gt); + err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt); if (err) goto out_resume; } @@ -633,10 +633,10 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_rpm_lockmap_acquire(xe); - for_each_gt(gt, xe, id) - xe_gt_idle_disable_c6(gt); - if (xe->d3cold.allowed) { + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + err = xe_pcode_ready(xe, true); if (err) goto out; @@ -657,7 +657,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_irq_resume(xe); for_each_gt(gt, xe, id) - xe_gt_resume(gt); + xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt); xe_display_pm_runtime_resume(xe); diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index c63335eb69e5..0b20059dd7b3 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -425,7 +425,7 @@ static ssize_t event_attr_show(struct device *dev, struct perf_pmu_events_attr *pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(buf, "event=%#04llx\n", pmu_attr->id); + return sysfs_emit(buf, "event=%#04llx\n", pmu_attr->id); } #define XE_EVENT_ATTR(name_, v_, id_) \ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 884127b4d97d..6cd78bb2b652 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -12,6 +12,7 @@ #include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_migrate.h" +#include "xe_page_reclaim.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" #include "xe_res_cursor.h" @@ -1535,6 +1536,9 @@ struct xe_pt_stage_unbind_walk { /** @modified_end: Walk range start, modified like @modified_start. */ u64 modified_end; + /** @prl: Backing pointer to page reclaim list in pt_update_ops */ + struct xe_page_reclaim_list *prl; + /* Output */ /* @wupd: Structure to track the page-table updates we're building */ struct xe_walk_update wupd; @@ -1572,6 +1576,68 @@ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, return false; } +/* Huge 2MB leaf lives directly in a level-1 table and has no children */ +static bool is_2m_pte(struct xe_pt *pte) +{ + return pte->level == 1 && !pte->base.children; +} + +/* page_size = 2^(reclamation_size + XE_PTE_SHIFT) */ +#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(page_size)); \ + ilog2(page_size) - XE_PTE_SHIFT; \ +}) + +static int generate_reclaim_entry(struct xe_tile *tile, + struct xe_page_reclaim_list *prl, + u64 pte, struct xe_pt *xe_child) +{ + struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries; + u64 phys_page = (pte & XE_PTE_ADDR_MASK) >> XE_PTE_SHIFT; + int num_entries = prl->num_entries; + u32 reclamation_size; + + xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL); + xe_tile_assert(tile, reclaim_entries); + xe_tile_assert(tile, num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1); + + if (!xe_page_reclaim_list_valid(prl)) + return -EINVAL; + + /** + * reclamation_size indicates the size of the page to be + * invalidated and flushed from non-coherent cache. + * Page size is computed as 2^(reclamation_size + XE_PTE_SHIFT) bytes. + * Only 4K, 64K (level 0), and 2M pages are supported by hardware for page reclaim + */ + if (xe_child->level == 0 && !(pte & XE_PTE_PS64)) { + reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K); /* reclamation_size = 0 */ + } else if (xe_child->level == 0) { + reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 4 */ + } else if (is_2m_pte(xe_child)) { + reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M); /* reclamation_size = 9 */ + } else { + xe_page_reclaim_list_invalidate(prl); + vm_dbg(&tile_to_xe(tile)->drm, + "PRL invalidate: unsupported PTE level=%u pte=%#llx\n", + xe_child->level, pte); + return -EINVAL; + } + + reclaim_entries[num_entries].qw = + FIELD_PREP(XE_PAGE_RECLAIM_VALID, 1) | + FIELD_PREP(XE_PAGE_RECLAIM_SIZE, reclamation_size) | + FIELD_PREP(XE_PAGE_RECLAIM_ADDR_LO, phys_page) | + FIELD_PREP(XE_PAGE_RECLAIM_ADDR_HI, phys_page >> 20); + prl->num_entries++; + vm_dbg(&tile_to_xe(tile)->drm, + "PRL add entry: level=%u pte=%#llx reclamation_size=%u prl_idx=%d\n", + xe_child->level, pte, reclamation_size, num_entries); + + return 0; +} + static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, unsigned int level, u64 addr, u64 next, struct xe_ptw **child, @@ -1579,11 +1645,48 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, struct xe_pt_walk *walk) { struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_device *xe = tile_to_xe(xe_walk->tile); XE_WARN_ON(!*child); XE_WARN_ON(!level); + /* Check for leaf node */ + if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && + !xe_child->base.children) { + struct iosys_map *leaf_map = &xe_child->bo->vmap; + pgoff_t first = xe_pt_offset(addr, 0, walk); + pgoff_t count = xe_pt_num_entries(addr, next, 0, walk); - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); + for (pgoff_t i = 0; i < count; i++) { + u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + int ret; + + /* Account for NULL terminated entry on end (-1) */ + if (xe_walk->prl->num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1) { + ret = generate_reclaim_entry(xe_walk->tile, xe_walk->prl, + pte, xe_child); + if (ret) + break; + } else { + /* overflow, mark as invalid */ + xe_page_reclaim_list_invalidate(xe_walk->prl); + vm_dbg(&xe->drm, + "PRL invalidate: overflow while adding pte=%#llx", + pte); + break; + } + } + } + + /* If aborting page walk early, invalidate PRL since PTE may be dropped from this abort */ + if (xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk) && + xe_walk->prl && level > 1 && xe_child->base.children && xe_child->num_live != 0) { + xe_page_reclaim_list_invalidate(xe_walk->prl); + vm_dbg(&xe->drm, + "PRL invalidate: kill at level=%u addr=%#llx next=%#llx num_live=%u\n", + level, addr, next, xe_child->num_live); + } return 0; } @@ -1654,6 +1757,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, { u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma); u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma); + struct xe_vm_pgtable_update_op *pt_update_op = + container_of(entries, struct xe_vm_pgtable_update_op, entries[0]); struct xe_pt_stage_unbind_walk xe_walk = { .base = { .ops = &xe_pt_stage_unbind_ops, @@ -1665,6 +1770,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, .modified_start = start, .modified_end = end, .wupd.entries = entries, + .prl = pt_update_op->prl, }; struct xe_pt *pt = vm->pt_root[tile->id]; @@ -1897,6 +2003,7 @@ static int unbind_op_prepare(struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, struct xe_vma *vma) { + struct xe_device *xe = tile_to_xe(tile); u32 current_op = pt_update_ops->current_op; struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; int err; @@ -1914,6 +2021,17 @@ static int unbind_op_prepare(struct xe_tile *tile, pt_op->vma = vma; pt_op->bind = false; pt_op->rebind = false; + /* + * Maintain one PRL located in pt_update_ops that all others in unbind op reference. + * Ensure that PRL is allocated only once, and if invalidated, remains an invalidated PRL. + */ + if (xe->info.has_page_reclaim_hw_assist && + xe_page_reclaim_list_is_new(&pt_update_ops->prl)) + xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl); + + /* Page reclaim may not be needed due to other features, so skip the corresponding VMA */ + pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl) && + !xe_page_reclaim_skip(tile, vma)) ? &pt_update_ops->prl : NULL; err = vma_reserve_fences(tile_to_xe(tile), vma); if (err) @@ -1979,6 +2097,7 @@ static int unbind_range_prepare(struct xe_vm *vm, pt_op->vma = XE_INVALID_VMA; pt_op->bind = false; pt_op->rebind = false; + pt_op->prl = NULL; pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range, pt_op->entries); @@ -2096,6 +2215,7 @@ xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) init_llist_head(&pt_update_ops->deferred); pt_update_ops->start = ~0x0ull; pt_update_ops->last = 0x0ull; + xe_page_reclaim_list_init(&pt_update_ops->prl); } /** @@ -2393,6 +2513,17 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) goto kill_vm_tile1; } update.ijob = ijob; + /* + * Only add page reclaim for the primary GT. Media GT does not have + * any PPC to flush, so enabling the PPC flush bit for media is + * effectively a NOP and provides no performance benefit nor + * interfere with primary GT. + */ + if (xe_page_reclaim_list_valid(&pt_update_ops->prl)) { + xe_tlb_inval_job_add_page_reclaim(ijob, &pt_update_ops->prl); + /* Release ref from alloc, job will now handle it */ + xe_page_reclaim_list_invalidate(&pt_update_ops->prl); + } if (tile->media_gt) { dep_scheduler = to_dep_scheduler(q, tile->media_gt); @@ -2518,6 +2649,8 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) &vops->pt_update_ops[tile->id]; int i; + xe_page_reclaim_entries_put(pt_update_ops->prl.entries); + lockdep_assert_held(&vops->vm->lock); xe_vm_assert_held(vops->vm); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 881f01e14db8..88fabf8e2655 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -8,6 +8,7 @@ #include +#include "xe_page_reclaim.h" #include "xe_pt_walk.h" struct xe_bo; @@ -79,6 +80,8 @@ struct xe_vm_pgtable_update_op { struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; /** @vma: VMA for operation, operation not valid if NULL */ struct xe_vma *vma; + /** @prl: Backing pointer to page reclaim list of pt_update_ops */ + struct xe_page_reclaim_list *prl; /** @num_entries: number of entries for this update operation */ u32 num_entries; /** @bind: is a bind */ @@ -95,6 +98,8 @@ struct xe_vm_pgtable_update_ops { struct llist_head deferred; /** @q: exec queue for PT operations */ struct xe_exec_queue *q; + /** @prl: embedded page reclaim list */ + struct xe_page_reclaim_list prl; /** @start: start address of ops */ u64 start; /** @last: last address of ops */ diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index bdbdbbf6a678..508f4c128a48 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -58,10 +58,9 @@ bool xe_pxp_is_enabled(const struct xe_pxp *pxp) static bool pxp_prerequisites_done(const struct xe_pxp *pxp) { struct xe_gt *gt = pxp->gt; - unsigned int fw_ref; bool ready; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); /* * If force_wake fails we could falsely report the prerequisites as not @@ -71,14 +70,12 @@ static bool pxp_prerequisites_done(const struct xe_pxp *pxp) * PXP. Therefore, we can just log the force_wake error and not escalate * it. */ - XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)); + XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)); /* PXP requires both HuC authentication via GSC and GSC proxy initialized */ ready = xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GSC) && xe_gsc_proxy_init_done(>->uc.gsc); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return ready; } @@ -104,13 +101,12 @@ int xe_pxp_get_readiness_status(struct xe_pxp *pxp) xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status)) return -EIO; - xe_pm_runtime_get(pxp->xe); + guard(xe_pm_runtime)(pxp->xe); /* PXP requires both HuC loaded and GSC proxy initialized */ if (pxp_prerequisites_done(pxp)) ret = 1; - xe_pm_runtime_put(pxp->xe); return ret; } @@ -135,35 +131,28 @@ static void pxp_invalidate_queues(struct xe_pxp *pxp); static int pxp_terminate_hw(struct xe_pxp *pxp) { struct xe_gt *gt = pxp->gt; - unsigned int fw_ref; int ret = 0; drm_dbg(&pxp->xe->drm, "Terminating PXP\n"); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { - ret = -EIO; - goto out; - } + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) + return -EIO; /* terminate the hw session */ ret = xe_pxp_submit_session_termination(pxp, ARB_SESSION); if (ret) - goto out; + return ret; ret = pxp_wait_for_session_state(pxp, ARB_SESSION, false); if (ret) - goto out; + return ret; /* Trigger full HW cleanup */ xe_mmio_write32(>->mmio, KCR_GLOBAL_TERMINATE, 1); /* now we can tell the GSC to clean up its own state */ - ret = xe_pxp_submit_session_invalidation(&pxp->gsc_res, ARB_SESSION); - -out: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return ret; + return xe_pxp_submit_session_invalidation(&pxp->gsc_res, ARB_SESSION); } static void mark_termination_in_progress(struct xe_pxp *pxp) @@ -326,14 +315,12 @@ static int kcr_pxp_set_status(const struct xe_pxp *pxp, bool enable) { u32 val = enable ? _MASKED_BIT_ENABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES) : _MASKED_BIT_DISABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES); - unsigned int fw_ref; - fw_ref = xe_force_wake_get(gt_to_fw(pxp->gt), XE_FW_GT); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(pxp->gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) return -EIO; xe_mmio_write32(&pxp->gt->mmio, KCR_INIT, val); - xe_force_wake_put(gt_to_fw(pxp->gt), fw_ref); return 0; } @@ -453,34 +440,28 @@ int xe_pxp_init(struct xe_device *xe) static int __pxp_start_arb_session(struct xe_pxp *pxp) { int ret; - unsigned int fw_ref; - fw_ref = xe_force_wake_get(gt_to_fw(pxp->gt), XE_FW_GT); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(pxp->gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) return -EIO; - if (pxp_session_is_in_play(pxp, ARB_SESSION)) { - ret = -EEXIST; - goto out_force_wake; - } + if (pxp_session_is_in_play(pxp, ARB_SESSION)) + return -EEXIST; ret = xe_pxp_submit_session_init(&pxp->gsc_res, ARB_SESSION); if (ret) { drm_err(&pxp->xe->drm, "Failed to init PXP arb session: %pe\n", ERR_PTR(ret)); - goto out_force_wake; + return ret; } ret = pxp_wait_for_session_state(pxp, ARB_SESSION, true); if (ret) { drm_err(&pxp->xe->drm, "PXP ARB session failed to go in play%pe\n", ERR_PTR(ret)); - goto out_force_wake; + return ret; } drm_dbg(&pxp->xe->drm, "PXP ARB session is active\n"); - -out_force_wake: - xe_force_wake_put(gt_to_fw(pxp->gt), fw_ref); - return ret; + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 1c0915e2cc16..75490683bad2 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -122,7 +122,6 @@ query_engine_cycles(struct xe_device *xe, __ktime_func_t cpu_clock; struct xe_hw_engine *hwe; struct xe_gt *gt; - unsigned int fw_ref; if (IS_SRIOV_VF(xe)) return -EOPNOTSUPP; @@ -158,17 +157,14 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -EIO; + xe_with_force_wake(fw_ref, gt_to_fw(gt), XE_FORCEWAKE_ALL) { + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) + return -EIO; + + hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp, + &resp.cpu_delta, cpu_clock); } - hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp, - &resp.cpu_delta, cpu_clock); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - if (GRAPHICS_VER(xe) >= 20) resp.width = 64; else @@ -342,6 +338,9 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; + if (GRAPHICS_VER(xe) >= 20) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT; config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = @@ -686,7 +685,9 @@ static int query_oa_units(struct xe_device *xe, du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | DRM_XE_OA_CAPS_OA_BUFFER_SIZE | DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | - DRM_XE_OA_CAPS_OAM; + DRM_XE_OA_CAPS_OAM | + DRM_XE_OA_CAPS_OA_UNIT_GT_ID; + du->gt_id = u->gt->info.id; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { if (!xe_hw_engine_is_reserved(hwe) && diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index fc8447a838c4..1a465385f909 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -168,7 +168,6 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) { struct xe_reg_sr_entry *entry; unsigned long reg; - unsigned int fw_ref; if (xa_empty(&sr->xa)) return; @@ -178,20 +177,14 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - goto err_force_wake; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); + return; + } xa_for_each(&sr->xa, reg, entry) apply_one_mmio(gt, entry); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); } /** diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 7ca360b2c20d..1391cb6ec9c6 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -9,6 +9,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "regs/xe_regs.h" +#include "xe_device.h" #include "xe_gt_types.h" #include "xe_gt_printk.h" #include "xe_platform_types.h" @@ -26,6 +27,13 @@ static bool match_not_render(const struct xe_device *xe, return hwe->class != XE_ENGINE_CLASS_RENDER; } +static bool match_has_mert(const struct xe_device *xe, + const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_device_has_mert((struct xe_device *)xe); +} + static const struct xe_rtp_entry_sr register_whitelist[] = { { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), @@ -67,28 +75,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) }, - { XE_RTP_NAME("oa_reg_render"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, - RING_FORCE_TO_NONPRIV_ACCESS_RW), - WHITELIST(OAG_OASTATUS, - RING_FORCE_TO_NONPRIV_ACCESS_RD), - WHITELIST(OAG_OAHEADPTR, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4)) - }, - { XE_RTP_NAME("oa_reg_compute"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), - ENGINE_CLASS(COMPUTE)), - XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, - RING_FORCE_TO_NONPRIV_ACCESS_RW), - WHITELIST(OAG_OASTATUS, - RING_FORCE_TO_NONPRIV_ACCESS_RD), - WHITELIST(OAG_OAHEADPTR, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4)) - }, { XE_RTP_NAME("14024997852"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(FF_MODE, @@ -96,6 +82,57 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { WHITELIST(VFLSKPD, RING_FORCE_TO_NONPRIV_ACCESS_RW)) }, + +#define WHITELIST_OA_MMIO_TRG(trg, status, head) \ + WHITELIST(trg, RING_FORCE_TO_NONPRIV_ACCESS_RW), \ + WHITELIST(status, RING_FORCE_TO_NONPRIV_ACCESS_RD), \ + WHITELIST(head, RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4) + +#define WHITELIST_OAG_MMIO_TRG \ + WHITELIST_OA_MMIO_TRG(OAG_MMIOTRIGGER, OAG_OASTATUS, OAG_OAHEADPTR) + +#define WHITELIST_OAM_MMIO_TRG \ + WHITELIST_OA_MMIO_TRG(OAM_MMIO_TRG(XE_OAM_SAG_BASE_ADJ), \ + OAM_STATUS(XE_OAM_SAG_BASE_ADJ), \ + OAM_HEAD_POINTER(XE_OAM_SAG_BASE_ADJ)), \ + WHITELIST_OA_MMIO_TRG(OAM_MMIO_TRG(XE_OAM_SCMI_0_BASE_ADJ), \ + OAM_STATUS(XE_OAM_SCMI_0_BASE_ADJ), \ + OAM_HEAD_POINTER(XE_OAM_SCMI_0_BASE_ADJ)), \ + WHITELIST_OA_MMIO_TRG(OAM_MMIO_TRG(XE_OAM_SCMI_1_BASE_ADJ), \ + OAM_STATUS(XE_OAM_SCMI_1_BASE_ADJ), \ + OAM_HEAD_POINTER(XE_OAM_SCMI_1_BASE_ADJ)) + +#define WHITELIST_OA_MERT_MMIO_TRG \ + WHITELIST_OA_MMIO_TRG(OAMERT_MMIO_TRG, OAMERT_STATUS, OAMERT_HEAD_POINTER) + + { XE_RTP_NAME("oag_mmio_trg_rcs"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST_OAG_MMIO_TRG) + }, + { XE_RTP_NAME("oag_mmio_trg_ccs"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(COMPUTE)), + XE_RTP_ACTIONS(WHITELIST_OAG_MMIO_TRG) + }, + { XE_RTP_NAME("oam_mmio_trg_vcs"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1300, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(WHITELIST_OAM_MMIO_TRG) + }, + { XE_RTP_NAME("oam_mmio_trg_vecs"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1300, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(VIDEO_ENHANCE)), + XE_RTP_ACTIONS(WHITELIST_OAM_MMIO_TRG) + }, + { XE_RTP_NAME("oa_mert_mmio_trg_ccs"), + XE_RTP_RULES(FUNC(match_has_mert), ENGINE_CLASS(COMPUTE)), + XE_RTP_ACTIONS(WHITELIST_OA_MERT_MMIO_TRG) + }, + { XE_RTP_NAME("oa_mert_mmio_trg_bcs"), + XE_RTP_RULES(FUNC(match_has_mert), ENGINE_CLASS(COPY)), + XE_RTP_ACTIONS(WHITELIST_OA_MERT_MMIO_TRG) + }, }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index ac0c6dcffe15..957b9e2fd138 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -12,7 +12,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" -#include "xe_exec_queue_types.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -135,12 +135,11 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, return i; } -static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, - int i) +static int emit_pipe_invalidate(struct xe_exec_queue *q, u32 mask_flags, + bool invalidate_tlb, u32 *dw, int i) { u32 flags0 = 0; - u32 flags1 = PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | + u32 flags1 = PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_VF_CACHE_INVALIDATE | @@ -152,6 +151,11 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, if (invalidate_tlb) flags1 |= PIPE_CONTROL_TLB_INVALIDATE; + if (xe_exec_queue_is_multi_queue(q)) + flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE; + else + flags1 |= PIPE_CONTROL_CS_STALL; + flags1 &= ~mask_flags; if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE) @@ -175,54 +179,52 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) { - struct xe_gt *gt = job->q->gt; + struct xe_exec_queue *q = job->q; + struct xe_gt *gt = q->gt; bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - u32 flags; + u32 flags0, flags1; if (XE_GT_WA(gt, 14016712196)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); - flags = (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | + flags0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + flags1 = (PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); if (XE_GT_WA(gt, 1409600907)) - flags |= PIPE_CONTROL_DEPTH_STALL; + flags1 |= PIPE_CONTROL_DEPTH_STALL; if (lacks_render) - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + flags1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + flags1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; - return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0); + if (xe_exec_queue_is_multi_queue(q)) + flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE; + else + flags1 |= PIPE_CONTROL_CS_STALL; + + return emit_pipe_control(dw, i, flags0, flags1, 0, 0); } -static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i) +static int emit_pipe_imm_ggtt(struct xe_exec_queue *q, u32 addr, u32 value, + bool stall_only, u32 *dw, int i) { - if (hwe->class != XE_ENGINE_CLASS_RENDER) - return i; - - if (XE_GT_WA(hwe->gt, 16020292621)) - i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC, - RING_NOPID(hwe->mmio_base).addr, 0); - - return i; -} - -static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, - int i) -{ - u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE; + u32 flags0 = 0, flags1 = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; if (!stall_only) - flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags1 |= PIPE_CONTROL_FLUSH_ENABLE; - return emit_pipe_control(dw, i, 0, flags, addr, value); + if (xe_exec_queue_is_multi_queue(q)) + flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE; + else + flags1 |= PIPE_CONTROL_CS_STALL; + + return emit_pipe_control(dw, i, flags0, flags1, addr, value); } static u32 get_ppgtt_flag(struct xe_sched_job *job) @@ -371,7 +373,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i); + i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) @@ -391,12 +393,10 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, job->user_fence.value, dw, i); - i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i); + i = emit_pipe_imm_ggtt(job->q, xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i); i = emit_user_interrupt(dw, i); - i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i); - xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 63a5263dcf1b..a87c1436c7c1 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) kvfree(sa_manager->cpu_ptr); sa_manager->bo = NULL; + sa_manager->shadow = NULL; } /** @@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) * @size: number of bytes to allocate * @guard: number of bytes to exclude from suballocations * @align: alignment for each suballocated chunk + * @flags: flags for suballocator * * Prepares the suballocation manager for suballocations. * * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure. */ -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align) +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, + u32 guard, u32 align, u32 flags) { struct xe_device *xe = tile_to_xe(tile); struct xe_sa_manager *sa_manager; @@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } + if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) { + struct xe_bo *shadow; + + ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard); + if (ret) + return ERR_PTR(ret); + + shadow = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(shadow)) { + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n", + size / SZ_1K, shadow); + return ERR_CAST(shadow); + } + sa_manager->shadow = shadow; + } + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, sa_manager); @@ -88,6 +111,48 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 return sa_manager; } +/** + * xe_sa_bo_swap_shadow() - Swap the SA BO with shadow BO. + * @sa_manager: the XE sub allocator manager + * + * Swaps the sub-allocator primary buffer object with shadow buffer object. + * + * Return: None. + */ +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager) +{ + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); + + xe_assert(xe, sa_manager->shadow); + lockdep_assert_held(&sa_manager->swap_guard); + + swap(sa_manager->bo, sa_manager->shadow); + if (!sa_manager->bo->vmap.is_iomem) + sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr; +} + +/** + * xe_sa_bo_sync_shadow() - Sync the SA Shadow BO with primary BO. + * @sa_bo: the sub-allocator buffer object. + * + * Synchronize sub-allocator shadow buffer object with primary buffer object. + * + * Return: None. + */ +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo) +{ + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); + + xe_assert(xe, sa_manager->shadow); + lockdep_assert_held(&sa_manager->swap_guard); + + xe_map_memcpy_to(xe, &sa_manager->shadow->vmap, + drm_suballoc_soffset(sa_bo), + xe_sa_bo_cpu_addr(sa_bo), + drm_suballoc_size(sa_bo)); +} + /** * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags. * @sa_manager: the &xe_sa_manager diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 1be744350836..05e9a4e00e78 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -14,12 +14,14 @@ struct dma_fence; struct xe_tile; -struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align); +#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0) +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, + u32 guard, u32 align, u32 flags); struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp); static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) { - return __xe_sa_bo_manager_init(tile, size, SZ_4K, align); + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0); } /** @@ -69,4 +71,18 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa) drm_suballoc_soffset(sa); } +void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager); +void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo); + +/** + * xe_sa_bo_swap_guard() - Retrieve the SA BO swap guard within sub-allocator. + * @sa_manager: the &xe_sa_manager + * + * Return: Sub alloctor swap guard mutex. + */ +static inline struct mutex *xe_sa_bo_swap_guard(struct xe_sa_manager *sa_manager) +{ + return &sa_manager->swap_guard; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index cb7238799dcb..1085c9c37d6b 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -12,6 +12,9 @@ struct xe_bo; struct xe_sa_manager { struct drm_suballoc_manager base; struct xe_bo *bo; + struct xe_bo *shadow; + /** @swap_guard: Timeline guard updating @bo and @shadow */ + struct mutex swap_guard; void *cpu_ptr; bool is_iomem; }; diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c index bab994696896..2cefefaed9ba 100644 --- a/drivers/gpu/drm/xe/xe_sriov_packet.c +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c @@ -358,7 +358,7 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, #define MIGRATION_DESCRIPTOR_DWORDS (GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_DEVID_LEN + \ GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_REVID_LEN) -static size_t pf_descriptor_init(struct xe_device *xe, unsigned int vfid) +static int pf_descriptor_init(struct xe_device *xe, unsigned int vfid) { struct xe_sriov_packet **desc = pf_pick_descriptor(xe, vfid); struct xe_sriov_packet *data; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 7c779d63179f..72423bb17e6f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -90,6 +90,7 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + struct xe_mert *mert = &xe_device_get_root_tile(xe)->mert; int err; xe_assert(xe, IS_SRIOV_PF(xe)); @@ -111,6 +112,9 @@ int xe_sriov_pf_init_early(struct xe_device *xe) xe_sriov_pf_service_init(xe); + spin_lock_init(&mert->lock); + init_completion(&mert->tlb_inv_done); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c index bad751217e1e..e84bdde9bc80 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c @@ -70,9 +70,8 @@ static ssize_t from_file_write_to_xe_call(struct file *file, const char __user * if (ret < 0) return ret; if (yes) { - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = call(xe); - xe_pm_runtime_put(xe); } if (ret < 0) return ret; @@ -209,9 +208,8 @@ static ssize_t from_file_write_to_vf_call(struct file *file, const char __user * if (ret < 0) return ret; if (yes) { - xe_pm_runtime_get(xe); + guard(xe_pm_runtime)(xe); ret = call(xe, vfid); - xe_pm_runtime_put(xe); } if (ret < 0) return ret; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c index c0b767ac735c..3d140506ba36 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -389,16 +389,12 @@ static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *a struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); struct xe_device *xe = vkobj->xe; - ssize_t ret; if (!vattr->store) return -EPERM; - xe_pm_runtime_get(xe); - ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count); - xe_pm_runtime_put(xe); - - return ret; + guard(xe_pm_runtime)(xe); + return xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count); } static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -423,18 +419,14 @@ static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *at struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); struct xe_device *xe = vkobj->xe; unsigned int vfid = vkobj->vfid; - ssize_t ret; xe_sriov_pf_assert_vfid(xe, vfid); if (!vattr->store) return -EPERM; - xe_pm_runtime_get(xe); - ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count); - xe_pm_runtime_get(xe); - - return ret; + guard(xe_pm_runtime)(xe); + return xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count); } static const struct sysfs_ops xe_sriov_dev_sysfs_ops = { diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 284ce37ca92d..1b75405b8d02 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -49,11 +49,13 @@ * * As soon as Virtual GPU of the VM starts, the VF driver within receives * the MIGRATED interrupt and schedules post-migration recovery worker. - * That worker queries GuC for new provisioning (using MMIO communication), + * That worker sends `VF2GUC_RESFIX_START` action along with non-zero + * marker, queries GuC for new provisioning (using MMIO communication), * and applies fixups to any non-virtualized resources used by the VF. * * When the VF driver is ready to continue operation on the newly connected - * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to + * hardware, it sends `VF2GUC_RESFIX_DONE` action along with the same + * marker which was sent with `VF2GUC_RESFIX_START` which causes it to * enter the long awaited `VF_RUNNING` state, and therefore start handling * CTB messages and scheduling workloads from the VF:: * @@ -102,12 +104,17 @@ * | [ ] new VF provisioning [ ] * | [ ]---------------------------> [ ] * | | [ ] + * | | VF2GUC_RESFIX_START [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] success [ ] + * | [ ]---------------------------> [ ] * | | VF driver applies post [ ] * | | migration fixups -------[ ] * | | | [ ] * | | -----> [ ] * | | [ ] - * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] + * | | VF2GUC_RESFIX_DONE [ ] * | [ ] <---------------------------[ ] * | [ ] [ ] * | [ ] GuC sets new VF state to [ ] @@ -118,6 +125,55 @@ * | [ ]---------------------------> [ ] * | | | * | | | + * + * Handling of VF double migration flow is shown below:: + * + * GuC1 VF + * | | + * | [ ]<--- start fixups + * | VF2GUC_RESFIX_START(marker) [ ] + * [ ] <-------------------------------------------[ ] + * [ ] [ ] + * [ ]---\ [ ] + * [ ] store marker [ ] + * [ ]<--/ [ ] + * [ ] [ ] + * [ ] success [ ] + * [ ] ------------------------------------------> [ ] + * | [ ] + * | [ ]---\ + * | [ ] do fixups + * | [ ]<--/ + * | [ ] + * -------------- VF paused / saved ---------------- + * : + * + * GuC2 + * | + * ----------------- VF restored ------------------ + * | + * [ ] + * [ ]---\ + * [ ] reset marker + * [ ]<--/ + * [ ] + * ----------------- VF resumed ------------------ + * | [ ] + * | [ ] + * | VF2GUC_RESFIX_DONE(marker) [ ] + * [ ] <-------------------------------------------[ ] + * [ ] [ ] + * [ ]---\ [ ] + * [ ] check marker [ ] + * [ ] (mismatch) [ ] + * [ ]<--/ [ ] + * [ ] [ ] + * [ ] RESPONSE_VF_MIGRATED [ ] + * [ ] ------------------------------------------> [ ] + * | [ ]---\ + * | [ ] reschedule fixups + * | [ ]<--/ + * | | */ /** @@ -170,6 +226,26 @@ void xe_sriov_vf_init_early(struct xe_device *xe) vf_migration_init_early(xe); } +static int vf_migration_init_late(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_uc_fw_version guc_version; + + if (!xe_sriov_vf_migration_supported(xe)) + return 0; + + xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version); + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 27, 0)) { + xe_sriov_vf_migration_disable(xe, + "requires GuC ABI >= 1.27.0, but only %u.%u.%u found", + guc_version.major, guc_version.minor, + guc_version.patch); + return 0; + } + + return xe_sriov_vf_ccs_init(xe); +} + /** * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. * @xe: the &xe_device to initialize @@ -180,7 +256,7 @@ void xe_sriov_vf_init_early(struct xe_device *xe) */ int xe_sriov_vf_init_late(struct xe_device *xe) { - return xe_sriov_vf_ccs_init(xe); + return vf_migration_init_late(xe); } static int sa_info_vf_ccs(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 797a4b866226..052a5071e69f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); - sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16); + sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, + XE_SA_BO_MANAGER_FLAG_SHADOW); if (IS_ERR(sa_manager)) { xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", @@ -162,9 +163,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) offset = 0; xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, bb_pool_size); + xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP, + bb_pool_size); offset = bb_pool_size - sizeof(u32); xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); + xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END); ctx->mem.ccs_bb_pool = sa_manager; @@ -381,6 +385,18 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe) return err; } +#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) +{ + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); + + xe_device_wmb(xe); + xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr); + xe_device_wmb(xe); +} + /** * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. * @bo: the &buffer object to which batch buffer commands will be added. @@ -441,9 +457,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) if (!bb) continue; - memset(bb->cs, MI_NOOP, bb->len * sizeof(u32)); - xe_bb_free(bb, NULL); - bo->bb_ccs[ctx_id] = NULL; + xe_migrate_ccs_rw_copy_clear(bo, ctx_id); } return 0; } @@ -463,8 +477,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) if (!IS_VF_CCS_READY(xe)) return; - xe_pm_runtime_get(xe); - + guard(xe_pm_runtime)(xe); for_each_ccs_rw_ctx(ctx_id) { bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; if (!bb_pool) @@ -475,6 +488,4 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); drm_puts(p, "\n"); } - - xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h index f8ca6efce9ee..00e58b36c510 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); int xe_sriov_vf_ccs_register_context(struct xe_device *xe); void xe_sriov_vf_ccs_rebase(struct xe_device *xe); void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); +void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx); static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 1662bfddd4bc..4c716182ad3b 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -16,11 +16,10 @@ #include "xe_heci_gsc.h" #include "xe_i2c.h" #include "xe_mmio.h" +#include "xe_nvm.h" #include "xe_pcode_api.h" #include "xe_vsec.h" -#define MAX_SCRATCH_MMIO 8 - /** * DOC: Survivability Mode * @@ -48,19 +47,43 @@ * * Refer :ref:`xe_configfs` for more details on how to use configfs * - * Survivability mode is indicated by the below admin-only readable sysfs which provides additional - * debug information:: + * Survivability mode is indicated by the below admin-only readable sysfs entry. It + * provides information about the type of survivability mode (Boot/Runtime). * - * /sys/bus/pci/devices//survivability_mode + * .. code-block:: shell * - * Capability Information: - * Provides boot status - * Postcode Information: - * Provides information about the failure - * Overflow Information - * Provides history of previous failures - * Auxiliary Information - * Certain failures may have information in addition to postcode information + * # cat /sys/bus/pci/devices//survivability_mode + * Boot + * + * + * Any additional debug information if present will be visible under the directory + * ``survivability_info``:: + * + * /sys/bus/pci/devices//survivability_info/ + * ├── aux_info0 + * ├── aux_info1 + * ├── aux_info2 + * ├── aux_info3 + * ├── aux_info4 + * ├── capability_info + * ├── fdo_mode + * ├── postcode_trace + * └── postcode_trace_overflow + * + * This directory has the following attributes + * + * - ``capability_info`` : Indicates Boot status and support for additional information + * + * - ``postcode_trace``, ``postcode_trace_overflow`` : Each postcode is a 8bit value and + * represents a boot failure event. When a new failure event is logged by PCODE the + * existing postcodes are shifted left. These entries provide a history of 8 postcodes. + * + * - ``aux_info`` : Some failures have additional debug information + * + * - ``fdo_mode`` : To allow recovery in scenarios where MEI itself fails, a new SPI Flash + * Descriptor Override (FDO) mode is added in v2 survivability breadcrumbs. This mode is enabled + * by PCODE and provides the ability to directly update the firmware via SPI Driver without + * any dependency on MEI. Xe KMD initializes the nvm aux driver if FDO mode is enabled. * * Runtime Survivability * ===================== @@ -68,61 +91,77 @@ * Certain runtime firmware errors can cause the device to enter a wedged state * (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation. * Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and - * is indicated by the presence of survivability mode sysfs:: - * - * /sys/bus/pci/devices//survivability_mode - * + * is indicated by the presence of survivability mode sysfs. * Survivability mode sysfs provides information about the type of survivability mode. * + * .. code-block:: shell + * + * # cat /sys/bus/pci/devices//survivability_mode + * Runtime + * * When such errors occur, userspace is notified with the drm device wedged uevent and runtime * survivability mode. User can then initiate a firmware flash using userspace tools like fwupd * to restore device to normal operation. */ -static u32 aux_history_offset(u32 reg_value) +static const char * const reg_map[] = { + [CAPABILITY_INFO] = "Capability Info", + [POSTCODE_TRACE] = "Postcode trace", + [POSTCODE_TRACE_OVERFLOW] = "Postcode trace overflow", + [AUX_INFO0] = "Auxiliary Info 0", + [AUX_INFO1] = "Auxiliary Info 1", + [AUX_INFO2] = "Auxiliary Info 2", + [AUX_INFO3] = "Auxiliary Info 3", + [AUX_INFO4] = "Auxiliary Info 4", +}; + +#define FDO_INFO (MAX_SCRATCH_REG + 1) + +struct xe_survivability_attribute { + struct device_attribute attr; + u8 index; +}; + +static struct +xe_survivability_attribute *dev_attr_to_survivability_attr(struct device_attribute *attr) { - return REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, reg_value); + return container_of(attr, struct xe_survivability_attribute, attr); } -static void set_survivability_info(struct xe_mmio *mmio, struct xe_survivability_info *info, - int id, char *name) +static void set_survivability_info(struct xe_mmio *mmio, u32 *info, int id) { - strscpy(info[id].name, name, sizeof(info[id].name)); - info[id].reg = PCODE_SCRATCH(id).raw; - info[id].value = xe_mmio_read32(mmio, PCODE_SCRATCH(id)); + info[id] = xe_mmio_read32(mmio, PCODE_SCRATCH(id)); } static void populate_survivability_info(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info = survivability->info; + u32 *info = survivability->info; struct xe_mmio *mmio; u32 id = 0, reg_value; - char name[NAME_MAX]; - int index; mmio = xe_root_tile_mmio(xe); - set_survivability_info(mmio, info, id, "Capability Info"); - reg_value = info[id].value; + set_survivability_info(mmio, info, CAPABILITY_INFO); + reg_value = info[CAPABILITY_INFO]; + + survivability->version = REG_FIELD_GET(BREADCRUMB_VERSION, reg_value); + /* FDO mode is exposed only from version 2 */ + if (survivability->version >= 2) + survivability->fdo_mode = REG_FIELD_GET(FDO_MODE, reg_value); if (reg_value & HISTORY_TRACKING) { - id++; - set_survivability_info(mmio, info, id, "Postcode Info"); + set_survivability_info(mmio, info, POSTCODE_TRACE); - if (reg_value & OVERFLOW_SUPPORT) { - id = REG_FIELD_GET(OVERFLOW_REG_OFFSET, reg_value); - set_survivability_info(mmio, info, id, "Overflow Info"); - } + if (reg_value & OVERFLOW_SUPPORT) + set_survivability_info(mmio, info, POSTCODE_TRACE_OVERFLOW); } + /* Traverse the linked list of aux info registers */ if (reg_value & AUXINFO_SUPPORT) { - id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value); - - for (index = 0; id && reg_value; index++, reg_value = info[id].value, - id = aux_history_offset(reg_value)) { - snprintf(name, NAME_MAX, "Auxiliary Info %d", index); - set_survivability_info(mmio, info, id, name); - } + for (id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value); + id >= AUX_INFO0 && id < MAX_SCRATCH_REG; + id = REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, info[id])) + set_survivability_info(mmio, info, id); } } @@ -130,15 +169,14 @@ static void log_survivability_info(struct pci_dev *pdev) { struct xe_device *xe = pdev_to_xe_device(pdev); struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info = survivability->info; + u32 *info = survivability->info; int id; dev_info(&pdev->dev, "Survivability Boot Status : Critical Failure (%d)\n", survivability->boot_status); - for (id = 0; id < MAX_SCRATCH_MMIO; id++) { - if (info[id].reg) - dev_info(&pdev->dev, "%s: 0x%x - 0x%x\n", info[id].name, - info[id].reg, info[id].value); + for (id = 0; id < MAX_SCRATCH_REG; id++) { + if (info[id]) + dev_info(&pdev->dev, "%s: 0x%x\n", reg_map[id], info[id]); } } @@ -156,43 +194,103 @@ static ssize_t survivability_mode_show(struct device *dev, struct pci_dev *pdev = to_pci_dev(dev); struct xe_device *xe = pdev_to_xe_device(pdev); struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info = survivability->info; - int index = 0, count = 0; - count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n", - survivability->type ? "Runtime" : "Boot"); - - if (!check_boot_failure(xe)) - return count; - - for (index = 0; index < MAX_SCRATCH_MMIO; index++) { - if (info[index].reg) - count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name, - info[index].reg, info[index].value); - } - - return count; + return sysfs_emit(buff, "%s\n", survivability->type ? "Runtime" : "Boot"); } static DEVICE_ATTR_ADMIN_RO(survivability_mode); +static ssize_t survivability_info_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_survivability_attribute *sa = dev_attr_to_survivability_attr(attr); + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + u32 *info = survivability->info; + + if (sa->index == FDO_INFO) + return sysfs_emit(buff, "%s\n", str_enabled_disabled(survivability->fdo_mode)); + + return sysfs_emit(buff, "0x%x\n", info[sa->index]); +} + +#define SURVIVABILITY_ATTR_RO(name, _index) \ + struct xe_survivability_attribute attr_##name = { \ + .attr = __ATTR(name, 0400, survivability_info_show, NULL), \ + .index = _index, \ + } + +static SURVIVABILITY_ATTR_RO(capability_info, CAPABILITY_INFO); +static SURVIVABILITY_ATTR_RO(postcode_trace, POSTCODE_TRACE); +static SURVIVABILITY_ATTR_RO(postcode_trace_overflow, POSTCODE_TRACE_OVERFLOW); +static SURVIVABILITY_ATTR_RO(aux_info0, AUX_INFO0); +static SURVIVABILITY_ATTR_RO(aux_info1, AUX_INFO1); +static SURVIVABILITY_ATTR_RO(aux_info2, AUX_INFO2); +static SURVIVABILITY_ATTR_RO(aux_info3, AUX_INFO3); +static SURVIVABILITY_ATTR_RO(aux_info4, AUX_INFO4); +static SURVIVABILITY_ATTR_RO(fdo_mode, FDO_INFO); + static void xe_survivability_mode_fini(void *arg) { struct xe_device *xe = arg; + struct xe_survivability *survivability = &xe->survivability; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; - sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); + if (survivability->fdo_mode) + xe_nvm_fini(xe); + + device_remove_file(dev, &dev_attr_survivability_mode); } +static umode_t survivability_info_attrs_visible(struct kobject *kobj, struct attribute *attr, + int idx) +{ + struct xe_device *xe = kdev_to_xe_device(kobj_to_dev(kobj)); + struct xe_survivability *survivability = &xe->survivability; + u32 *info = survivability->info; + + /* + * Last index in survivability_info_attrs is fdo mode and is applicable only in + * version 2 of survivability mode + */ + if (idx == MAX_SCRATCH_REG && survivability->version >= 2) + return 0400; + + if (idx < MAX_SCRATCH_REG && info[idx]) + return 0400; + + return 0; +} + +/* Attributes are ordered according to enum scratch_reg */ +static struct attribute *survivability_info_attrs[] = { + &attr_capability_info.attr.attr, + &attr_postcode_trace.attr.attr, + &attr_postcode_trace_overflow.attr.attr, + &attr_aux_info0.attr.attr, + &attr_aux_info1.attr.attr, + &attr_aux_info2.attr.attr, + &attr_aux_info3.attr.attr, + &attr_aux_info4.attr.attr, + &attr_fdo_mode.attr.attr, + NULL, +}; + +static const struct attribute_group survivability_info_group = { + .name = "survivability_info", + .attrs = survivability_info_attrs, + .is_visible = survivability_info_attrs_visible, +}; + static int create_survivability_sysfs(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct xe_device *xe = pdev_to_xe_device(pdev); int ret; - /* create survivability mode sysfs */ - ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); + ret = device_create_file(dev, &dev_attr_survivability_mode); if (ret) { dev_warn(dev, "Failed to create survivability sysfs files\n"); return ret; @@ -203,6 +301,12 @@ static int create_survivability_sysfs(struct pci_dev *pdev) if (ret) return ret; + if (check_boot_failure(xe)) { + ret = devm_device_add_group(dev, &survivability_info_group); + if (ret) + return ret; + } + return 0; } @@ -220,12 +324,16 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev) /* Make sure xe_heci_gsc_init() knows about survivability mode */ survivability->mode = true; - ret = xe_heci_gsc_init(xe); - if (ret) - goto err; + xe_heci_gsc_init(xe); xe_vsec_init(xe); + if (survivability->fdo_mode) { + ret = xe_nvm_init(xe); + if (ret) + goto err; + } + ret = xe_i2c_probe(xe); if (ret) goto err; @@ -235,29 +343,11 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev) return 0; err: + dev_err(dev, "Failed to enable Survivability Mode\n"); survivability->mode = false; return ret; } -static int init_survivability_mode(struct xe_device *xe) -{ - struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info; - - survivability->size = MAX_SCRATCH_MMIO; - - info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), - GFP_KERNEL); - if (!info) - return -ENOMEM; - - survivability->info = info; - - populate_survivability_info(xe); - - return 0; -} - /** * xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled * @xe: xe device instance @@ -325,9 +415,7 @@ int xe_survivability_mode_runtime_enable(struct xe_device *xe) return -EINVAL; } - ret = init_survivability_mode(xe); - if (ret) - return ret; + populate_survivability_info(xe); ret = create_survivability_sysfs(pdev); if (ret) @@ -356,17 +444,16 @@ int xe_survivability_mode_boot_enable(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - int ret; if (!xe_survivability_mode_is_requested(xe)) return 0; - ret = init_survivability_mode(xe); - if (ret) - return ret; + populate_survivability_info(xe); - /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */ - if (survivability->boot_status == CRITICAL_FAILURE) { + /* + * v2 supports survivability mode for critical errors + */ + if (survivability->version < 2 && survivability->boot_status == CRITICAL_FAILURE) { log_survivability_info(pdev); return -ENXIO; } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h index cd65a5d167c9..bd5dc1c955ff 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h @@ -9,23 +9,29 @@ #include #include +enum scratch_reg { + CAPABILITY_INFO, + POSTCODE_TRACE, + POSTCODE_TRACE_OVERFLOW, + AUX_INFO0, + AUX_INFO1, + AUX_INFO2, + AUX_INFO3, + AUX_INFO4, + MAX_SCRATCH_REG, +}; + enum xe_survivability_type { XE_SURVIVABILITY_TYPE_BOOT, XE_SURVIVABILITY_TYPE_RUNTIME, }; -struct xe_survivability_info { - char name[NAME_MAX]; - u32 reg; - u32 value; -}; - /** * struct xe_survivability: Contains survivability mode information */ struct xe_survivability { - /** @info: struct that holds survivability info from scratch registers */ - struct xe_survivability_info *info; + /** @info: survivability debug info */ + u32 info[MAX_SCRATCH_REG]; /** @size: number of scratch registers */ u32 size; @@ -38,6 +44,12 @@ struct xe_survivability { /** @type: survivability type */ enum xe_survivability_type type; + + /** @fdo_mode: indicates if FDO mode is enabled */ + bool fdo_mode; + + /** @version: breadcrumb version of survivability mode */ + u8 version; }; #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 55c5a0eb82e1..93550c7c84ac 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -176,24 +176,13 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, mmu_range); } -static s64 xe_svm_stats_ktime_us_delta(ktime_t start) -{ - return IS_ENABLED(CONFIG_DEBUG_FS) ? - ktime_us_delta(ktime_get(), start) : 0; -} - static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) { - s64 us_delta = xe_svm_stats_ktime_us_delta(start); + s64 us_delta = xe_gt_stats_ktime_us_delta(start); xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); } -static ktime_t xe_svm_stats_ktime_get(void) -{ - return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0; -} - static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, const struct mmu_notifier_range *mmu_range) @@ -202,7 +191,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct xe_device *xe = vm->xe; struct drm_gpusvm_range *r, *first; struct xe_tile *tile; - ktime_t start = xe_svm_stats_ktime_get(); + ktime_t start = xe_gt_stats_ktime_get(); u64 adj_start = mmu_range->start, adj_end = mmu_range->end; u8 tile_mask = 0, id; long err; @@ -285,19 +274,21 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm, return 0; } -static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end) +static void xe_vma_set_default_attributes(struct xe_vma *vma) +{ + vma->attr.preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE; + vma->attr.preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES; + vma->attr.pat_index = vma->attr.default_pat_index; + vma->attr.atomic_access = DRM_XE_ATOMIC_UNDEFINED; +} + +static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end) { struct xe_vma *vma; - struct xe_vma_mem_attr default_attr = { - .preferred_loc = { - .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, - .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, - }, - .atomic_access = DRM_XE_ATOMIC_UNDEFINED, - }; - int err = 0; + bool has_default_attr; + int err; - vma = xe_vm_find_vma_by_addr(vm, range_start); + vma = xe_vm_find_vma_by_addr(vm, start); if (!vma) return -EINVAL; @@ -306,25 +297,30 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 return 0; } - if (xe_vma_has_default_mem_attrs(vma)) - return 0; - vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", xe_vma_start(vma), xe_vma_end(vma)); - if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) { - default_attr.pat_index = vma->attr.default_pat_index; - default_attr.default_pat_index = vma->attr.default_pat_index; - vma->attr = default_attr; - } else { - vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx", - range_start, range_end); - err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start); - if (err) { - drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err)); - xe_vm_kill(vm, true); - return err; - } + has_default_attr = xe_vma_has_default_mem_attrs(vma); + + if (has_default_attr) { + start = xe_vma_start(vma); + end = xe_vma_end(vma); + } else if (xe_vma_start(vma) == start && xe_vma_end(vma) == end) { + xe_vma_set_default_attributes(vma); + } + + xe_vm_find_cpu_addr_mirror_vma_range(vm, &start, &end); + + if (xe_vma_start(vma) == start && xe_vma_end(vma) == end && has_default_attr) + return 0; + + vm_dbg(&vm->xe->drm, "New VMA start=0x%016llx, vma_end=0x%016llx", start, end); + + err = xe_vm_alloc_cpu_addr_mirror_vma(vm, start, end - start); + if (err) { + drm_warn(&vm->xe->drm, "New VMA MAP failed: %pe\n", ERR_PTR(err)); + xe_vm_kill(vm, true); + return err; } /* @@ -435,7 +431,7 @@ static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, unsigned long npages, ktime_t start) { - s64 us_delta = xe_svm_stats_ktime_us_delta(start); + s64 us_delta = xe_gt_stats_ktime_us_delta(start); if (dir == XE_SVM_COPY_TO_VRAM) { switch (npages) { @@ -487,7 +483,7 @@ static int xe_svm_copy(struct page **pages, u64 vram_addr = XE_VRAM_ADDR_INVALID; int err = 0, pos = 0; bool sram = dir == XE_SVM_COPY_TO_SRAM; - ktime_t start = xe_svm_stats_ktime_get(); + ktime_t start = xe_gt_stats_ktime_get(); /* * This flow is complex: it locates physically contiguous device pages, @@ -979,7 +975,7 @@ static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ struct xe_svm_range *range, \ ktime_t start) \ { \ - s64 us_delta = xe_svm_stats_ktime_us_delta(start); \ + s64 us_delta = xe_gt_stats_ktime_us_delta(start); \ \ switch (xe_svm_range_size(range)) { \ case SZ_4K: \ @@ -1024,7 +1020,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct drm_pagemap *dpagemap; struct xe_tile *tile = gt_to_tile(gt); int migrate_try_count = ctx.devmem_only ? 3 : 1; - ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start; + ktime_t start = xe_gt_stats_ktime_get(), bind_start, get_pages_start; int err; lockdep_assert_held_write(&vm->lock); @@ -1063,7 +1059,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { - ktime_t migrate_start = xe_svm_stats_ktime_get(); + ktime_t migrate_start = xe_gt_stats_ktime_get(); /* TODO : For multi-device dpagemap will be used to find the * remote tile and remote device. Will need to modify @@ -1100,7 +1096,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, } get_pages: - get_pages_start = xe_svm_stats_ktime_get(); + get_pages_start = xe_gt_stats_ktime_get(); range_debug(range, "GET PAGES"); err = xe_svm_range_get_pages(vm, range, &ctx); @@ -1127,7 +1123,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); range_debug(range, "PAGE FAULT - BIND"); - bind_start = xe_svm_stats_ktime_get(); + bind_start = xe_gt_stats_ktime_get(); xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { err = xe_vm_drm_exec_lock(vm, &exec); drm_exec_retry_on_contention(&exec); diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index ff74528ca0c6..c8fdcdbd6ae7 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -228,6 +228,32 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) return 0; } +/** + * xe_sync_entry_wait() - Wait on in-sync + * @sync: Sync object + * + * If the sync is in an in-sync, wait on the sync to signal. + * + * Return: 0 on success, -ERESTARTSYS on failure (interruption) + */ +int xe_sync_entry_wait(struct xe_sync_entry *sync) +{ + return xe_sync_needs_wait(sync) ? + dma_fence_wait(sync->fence, true) : 0; +} + +/** + * xe_sync_needs_wait() - Sync needs a wait (input dma-fence not signaled) + * @sync: Sync object + * + * Return: True if sync needs a wait, False otherwise + */ +bool xe_sync_needs_wait(struct xe_sync_entry *sync) +{ + return sync->fence && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &sync->fence->flags); +} + void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) { if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)) @@ -311,8 +337,11 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct xe_tile *tile; u8 id; - for_each_tile(tile, vm->xe, id) - num_fence += (1 + XE_MAX_GT_PER_TILE); + for_each_tile(tile, vm->xe, id) { + num_fence++; + for_each_tlb_inval(i) + num_fence++; + } fences = kmalloc_array(num_fence, sizeof(*fences), GFP_KERNEL); diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 51f2d803e977..6b949194acff 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -29,6 +29,8 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence); +int xe_sync_entry_wait(struct xe_sync_entry *sync); +bool xe_sync_needs_wait(struct xe_sync_entry *sync); void xe_sync_entry_cleanup(struct xe_sync_entry *sync); struct dma_fence * xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 4f4f9a5c43af..63c060c2ea5c 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -209,6 +209,11 @@ int xe_tile_init(struct xe_tile *tile) if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); + /* Optimistically anticipate at most 256 TLB fences with PRL */ + tile->mem.reclaim_pool = xe_sa_bo_manager_init(tile, SZ_1M, XE_PAGE_RECLAIM_LIST_MAX_SIZE); + if (IS_ERR(tile->mem.reclaim_pool)) + return PTR_ERR(tile->mem.reclaim_pool); + return 0; } void xe_tile_migrate_wait(struct xe_tile *tile) diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c index fff242a5ae56..5df2f461b7b7 100644 --- a/drivers/gpu/drm/xe/xe_tile_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c @@ -82,13 +82,9 @@ int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct xe_tile *tile = node_to_tile(node); struct xe_device *xe = tile_to_xe(tile); - int ret; - xe_pm_runtime_get(xe); - ret = xe_tile_debugfs_simple_show(m, data); - xe_pm_runtime_put(xe); - - return ret; + guard(xe_pm_runtime)(xe); + return xe_tile_debugfs_simple_show(m, data); } static int ggtt(struct xe_tile *tile, struct drm_printer *p) @@ -110,6 +106,13 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "sa_info", .show = xe_tile_debugfs_show_with_rpm, .data = sa_info }, }; +static void tile_debugfs_create_vram_mm(struct xe_tile *tile) +{ + if (tile->mem.vram) + ttm_resource_manager_create_debugfs(&tile->mem.vram->ttm.manager, tile->debugfs, + "vram_mm"); +} + /** * xe_tile_debugfs_register - Register tile's debugfs attributes * @tile: the &xe_tile to register @@ -139,4 +142,6 @@ void xe_tile_debugfs_register(struct xe_tile *tile) drm_debugfs_create_files(vf_safe_debugfs_list, ARRAY_SIZE(vf_safe_debugfs_list), tile->debugfs, minor); + + tile_debugfs_create_vram_mm(tile); } diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c index f3f478f14ff5..7f97db2f89bb 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c @@ -141,12 +141,11 @@ static int NAME##_set(void *data, u64 val) \ if (val > (TYPE)~0ull) \ return -EOVERFLOW; \ \ - xe_pm_runtime_get(xe); \ + guard(xe_pm_runtime)(xe); \ err = xe_sriov_pf_wait_ready(xe) ?: \ xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ if (!err) \ xe_sriov_pf_provision_set_custom_mode(xe); \ - xe_pm_runtime_put(xe); \ \ return err; \ } \ diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index 918a59e686ea..dec042248164 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -199,6 +199,20 @@ void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) mutex_unlock(&tlb_inval->seqno_lock); } +/** + * xe_tlb_inval_reset_timeout() - Reset TLB inval fence timeout + * @tlb_inval: TLB invalidation client + * + * Reset the TLB invalidation timeout timer. + */ +static void xe_tlb_inval_reset_timeout(struct xe_tlb_inval *tlb_inval) +{ + lockdep_assert_held(&tlb_inval->pending_lock); + + mod_delayed_work(system_wq, &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); +} + static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) { int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); @@ -299,6 +313,7 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) * @start: start address * @end: end address * @asid: address space id + * @prl_sa: suballocation of page reclaim list if used, NULL indicates PPC flush * * Issue a range based TLB invalidation if supported, if not fallback to a full * TLB invalidation. Completion of TLB is asynchronous and caller can use @@ -308,10 +323,10 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) */ int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, struct xe_tlb_inval_fence *fence, u64 start, u64 end, - u32 asid) + u32 asid, struct drm_suballoc *prl_sa) { return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, - start, end, asid); + start, end, asid, prl_sa); } /** @@ -327,7 +342,7 @@ void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) u64 range = 1ull << vm->xe->info.va_bits; xe_tlb_inval_fence_init(tlb_inval, &fence, true); - xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); + xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid, NULL); xe_tlb_inval_fence_wait(&fence); } @@ -360,6 +375,12 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) * process_g2h_msg(). */ spin_lock_irqsave(&tlb_inval->pending_lock, flags); + if (seqno == TLB_INVALIDATION_SEQNO_INVALID) { + xe_tlb_inval_reset_timeout(tlb_inval); + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); + return; + } + if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); return; diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h index 05614915463a..858d0690f995 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -23,7 +23,7 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval); void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, struct xe_tlb_inval_fence *fence, - u64 start, u64 end, u32 asid); + u64 start, u64 end, u32 asid, struct drm_suballoc *prl_sa); void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, struct xe_tlb_inval_fence *fence, diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c index 1ae0dec2cf31..6a7bd6315797 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -7,7 +7,9 @@ #include "xe_dep_job_types.h" #include "xe_dep_scheduler.h" #include "xe_exec_queue.h" +#include "xe_gt_printk.h" #include "xe_gt_types.h" +#include "xe_page_reclaim.h" #include "xe_tlb_inval.h" #include "xe_tlb_inval_job.h" #include "xe_migrate.h" @@ -24,6 +26,8 @@ struct xe_tlb_inval_job { struct xe_exec_queue *q; /** @vm: VM which TLB invalidation is being issued for */ struct xe_vm *vm; + /** @prl: Embedded copy of page reclaim list */ + struct xe_page_reclaim_list prl; /** @refcount: ref count of this job */ struct kref refcount; /** @@ -47,9 +51,16 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job) container_of(dep_job, typeof(*job), dep); struct xe_tlb_inval_fence *ifence = container_of(job->fence, typeof(*ifence), base); + struct drm_suballoc *prl_sa = NULL; + + if (xe_page_reclaim_list_valid(&job->prl)) { + prl_sa = xe_page_reclaim_create_prl_bo(job->tlb_inval, &job->prl, ifence); + if (IS_ERR(prl_sa)) + prl_sa = NULL; /* Indicate fall back PPC flush with NULL */ + } xe_tlb_inval_range(job->tlb_inval, ifence, job->start, - job->end, job->vm->usm.asid); + job->end, job->vm->usm.asid, prl_sa); return job->fence; } @@ -107,6 +118,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, job->start = start; job->end = end; job->fence_armed = false; + xe_page_reclaim_list_init(&job->prl); job->dep.ops = &dep_job_ops; job->type = type; kref_init(&job->refcount); @@ -140,6 +152,25 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, return ERR_PTR(err); } +/** + * xe_tlb_inval_job_add_page_reclaim() - Embed PRL into a TLB job + * @job: TLB invalidation job that may trigger reclamation + * @prl: Page reclaim list populated during unbind + * + * Copies @prl into the job and takes an extra reference to the entry page so + * ownership can transfer to the TLB fence when the job is pushed. + */ +void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job, + struct xe_page_reclaim_list *prl) +{ + struct xe_device *xe = gt_to_xe(job->q->gt); + + xe_gt_WARN_ON(job->q->gt, !xe->info.has_page_reclaim_hw_assist); + job->prl = *prl; + /* Pair with put in job_destroy */ + xe_page_reclaim_entries_get(job->prl.entries); +} + static void xe_tlb_inval_job_destroy(struct kref *ref) { struct xe_tlb_inval_job *job = container_of(ref, typeof(*job), @@ -150,6 +181,9 @@ static void xe_tlb_inval_job_destroy(struct kref *ref) struct xe_device *xe = gt_to_xe(q->gt); struct xe_vm *vm = job->vm; + /* BO creation retains a copy (if used), so no longer needed */ + xe_page_reclaim_entries_put(job->prl.entries); + if (!job->fence_armed) kfree(ifence); else diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h index 4d6df1a6c6ca..03d6e21cd611 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h @@ -12,6 +12,7 @@ struct dma_fence; struct xe_dep_scheduler; struct xe_exec_queue; struct xe_migrate; +struct xe_page_reclaim_list; struct xe_tlb_inval; struct xe_tlb_inval_job; struct xe_vm; @@ -21,6 +22,9 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, struct xe_dep_scheduler *dep_scheduler, struct xe_vm *vm, u64 start, u64 end, int type); +void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job, + struct xe_page_reclaim_list *prl); + int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job); struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h index 8f8b060e9005..48d1503e8460 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h @@ -9,6 +9,7 @@ #include #include +struct drm_suballoc; struct xe_tlb_inval; /** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */ @@ -40,12 +41,13 @@ struct xe_tlb_inval_ops { * @start: Start address * @end: End address * @asid: Address space ID + * @prl_sa: Suballocation for page reclaim list * * Return 0 on success, -ECANCELED if backend is mid-reset, error on * failure */ int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start, - u64 end, u32 asid); + u64 end, u32 asid, struct drm_suballoc *prl_sa); /** * @initialized: Backend is initialized @@ -80,6 +82,7 @@ struct xe_tlb_inval { const struct xe_tlb_inval_ops *ops; /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */ #define TLB_INVALIDATION_SEQNO_MAX 0x100000 +#define TLB_INVALIDATION_SEQNO_INVALID TLB_INVALIDATION_SEQNO_MAX int seqno; /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */ struct mutex seqno_lock; diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 79a97b086cb2..6d12fcc13f43 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -13,6 +13,7 @@ #include #include "xe_exec_queue_types.h" +#include "xe_exec_queue.h" #include "xe_gpu_scheduler_types.h" #include "xe_gt_types.h" #include "xe_guc_exec_queue_types.h" @@ -97,11 +98,51 @@ DECLARE_EVENT_CLASS(xe_exec_queue, __entry->guc_state, __entry->flags) ); +DECLARE_EVENT_CLASS(xe_exec_queue_multi_queue, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q), + + TP_STRUCT__entry( + __string(dev, __dev_name_eq(q)) + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u8, gt_id) + __field(u16, width) + __field(u32, guc_id) + __field(u32, guc_state) + __field(u32, flags) + __field(u32, primary) + ), + + TP_fast_assign( + __assign_str(dev); + __entry->class = q->class; + __entry->logical_mask = q->logical_mask; + __entry->gt_id = q->gt->info.id; + __entry->width = q->width; + __entry->guc_id = q->guc->id; + __entry->guc_state = atomic_read(&q->guc->state); + __entry->flags = q->flags; + __entry->primary = xe_exec_queue_multi_queue_primary(q)->guc->id; + ), + + TP_printk("dev=%s, %d:0x%x, gt=%d, width=%d guc_id=%d, guc_state=0x%x, flags=0x%x, primary=%d", + __get_str(dev), __entry->class, __entry->logical_mask, + __entry->gt_id, __entry->width, __entry->guc_id, + __entry->guc_state, __entry->flags, + __entry->primary) +); + DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create, TP_PROTO(struct xe_exec_queue *q), TP_ARGS(q) ); +DEFINE_EVENT(xe_exec_queue_multi_queue, xe_exec_queue_create_multi_queue, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume, TP_PROTO(struct xe_exec_queue *q), TP_ARGS(q) @@ -172,6 +213,11 @@ DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error, TP_ARGS(q) ); +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cgp_context_error, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) +); + DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop, TP_PROTO(struct xe_exec_queue *q), TP_ARGS(q) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 465bda355443..157520ea1783 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -218,9 +218,12 @@ int xe_uc_load_hw(struct xe_uc *uc) xe_guc_engine_activity_enable_stats(&uc->guc); - /* We don't fail the driver load if HuC fails to auth, but let's warn */ + /* We don't fail the driver load if HuC fails to auth */ ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC); - xe_gt_assert(uc_to_gt(uc), !ret); + if (ret) + xe_gt_err(uc_to_gt(uc), + "HuC authentication failed (%pe), continuing with no HuC\n", + ERR_PTR(ret)); /* GSC load is async */ xe_gsc_load_start(&uc->gsc); @@ -301,6 +304,34 @@ int xe_uc_suspend(struct xe_uc *uc) return xe_guc_suspend(&uc->guc); } +/** + * xe_uc_runtime_suspend() - UC runtime suspend + * @uc: the UC object + * + * Runtime suspend all UCs. + */ +void xe_uc_runtime_suspend(struct xe_uc *uc) +{ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return; + + xe_guc_runtime_suspend(&uc->guc); +} + +/** + * xe_uc_runtime_resume() - UC runtime resume + * @uc: the UC object + * + * Runtime resume all UCs. + */ +void xe_uc_runtime_resume(struct xe_uc *uc) +{ + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return; + + xe_guc_runtime_resume(&uc->guc); +} + /** * xe_uc_declare_wedged() - Declare UC wedged * @uc: the UC object diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 21c9306098cf..5398da1a8097 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -14,6 +14,8 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); +void xe_uc_runtime_resume(struct xe_uc *uc); +void xe_uc_runtime_suspend(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 622b76078567..dcb4a32e7a64 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -115,11 +115,11 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ - fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 54, 0)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 54, 0)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 53, 0)) \ + fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 53, 0)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 53, 0)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 79ab6c512d3e..95e22ff95ea8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1509,9 +1509,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_LIST_HEAD(&vm->preempt.exec_queues); if (flags & XE_VM_FLAG_FAULT_MODE) - vm->preempt.min_run_period_ms = 0; + vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; else - vm->preempt.min_run_period_ms = 5; + vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; for_each_tile(tile, xe, id) xe_range_fence_tree_init(&vm->rftree[id]); @@ -2236,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct drm_gpuvm_bo *vm_bo; + u64 range_start = addr; u64 range_end = addr + range; int err; @@ -2248,10 +2249,16 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, switch (operation) { case DRM_XE_VM_BIND_OP_MAP: + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) { + xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end); + vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; + } + + fallthrough; case DRM_XE_VM_BIND_OP_MAP_USERPTR: { struct drm_gpuvm_map_req map_req = { - .map.va.addr = addr, - .map.va.range = range, + .map.va.addr = range_start, + .map.va.range = range_end - range_start, .map.gem.obj = obj, .map.gem.offset = bo_offset_or_userptr, }; @@ -2451,8 +2458,17 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, if (IS_ERR(vma)) return vma; - if (xe_vma_is_userptr(vma)) + if (xe_vma_is_userptr(vma)) { err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + /* + * -EBUSY has dedicated meaning that a user fence + * attached to the VMA is busy, in practice + * xe_vma_userptr_pin_pages can only fail with -EBUSY if + * we are low on memory so convert this to -ENOMEM. + */ + if (err == -EBUSY) + err = -ENOMEM; + } } if (err) { prep_vma_destroy(vm, vma, false); @@ -2727,7 +2743,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, if (xe_vma_is_cpu_addr_mirror(vma) && xe_svm_has_mapping(vm, xe_vma_start(vma), - xe_vma_end(vma))) + xe_vma_end(vma)) && + !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP)) return -EBUSY; if (!xe_vma_is_cpu_addr_mirror(vma)) @@ -3107,19 +3124,19 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int number_tiles = 0, current_fence = 0, n_fence = 0, err; + int number_tiles = 0, current_fence = 0, n_fence = 0, err, i; u8 id; number_tiles = vm_ops_setup_tile_args(vm, vops); if (number_tiles == 0) return ERR_PTR(-ENODATA); - if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) { - for_each_tile(tile, vm->xe, id) - ++n_fence; - } else { - for_each_tile(tile, vm->xe, id) - n_fence += (1 + XE_MAX_GT_PER_TILE); + for_each_tile(tile, vm->xe, id) { + ++n_fence; + + if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) + for_each_tlb_inval(i) + ++n_fence; } fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL); @@ -3149,7 +3166,6 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) { struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; - int i; fence = NULL; if (!vops->pt_update_ops[id].num_ops) @@ -3214,7 +3230,8 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, { switch (op->base.op) { case DRM_GPUVA_OP_MAP: - vma_add_ufence(op->map.vma, ufence); + if (!xe_vma_is_cpu_addr_mirror(op->map.vma)) + vma_add_ufence(op->map.vma, ufence); break; case DRM_GPUVA_OP_REMAP: if (op->remap.prev) @@ -3490,6 +3507,10 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; + if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && + xe_pat_index_get_comp_en(xe, pat_index))) + return -EINVAL; + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > xe_bo_size(bo) - range)) { @@ -3913,7 +3934,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, &fence[fence_id], start, end, - vm->usm.asid); + vm->usm.asid, NULL); if (err) goto wait; ++fence_id; @@ -3926,7 +3947,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, &fence[fence_id], start, end, - vm->usm.asid); + vm->usm.asid, NULL); if (err) goto wait; ++fence_id; @@ -4032,10 +4053,18 @@ int xe_vm_validate_protected(struct xe_vm *vm) } struct xe_vm_snapshot { + int uapi_flags; unsigned long num_snaps; struct { u64 ofs, bo_ofs; unsigned long len; +#define XE_VM_SNAP_FLAG_USERPTR BIT(0) +#define XE_VM_SNAP_FLAG_READ_ONLY BIT(1) +#define XE_VM_SNAP_FLAG_IS_NULL BIT(2) + unsigned long flags; + int uapi_mem_region; + int pat_index; + int cpu_caching; struct xe_bo *bo; void *data; struct mm_struct *mm; @@ -4064,6 +4093,13 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) goto out_unlock; } + if (vm->flags & XE_VM_FLAG_FAULT_MODE) + snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE; + if (vm->flags & XE_VM_FLAG_LR_MODE) + snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE; + if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE) + snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; + snap->num_snaps = num_snaps; i = 0; drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { @@ -4076,9 +4112,25 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) snap->snap[i].ofs = xe_vma_start(vma); snap->snap[i].len = xe_vma_size(vma); + snap->snap[i].flags = xe_vma_read_only(vma) ? + XE_VM_SNAP_FLAG_READ_ONLY : 0; + snap->snap[i].pat_index = vma->attr.pat_index; if (bo) { + snap->snap[i].cpu_caching = bo->cpu_caching; snap->snap[i].bo = xe_bo_get(bo); snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); + switch (bo->ttm.resource->mem_type) { + case XE_PL_SYSTEM: + case XE_PL_TT: + snap->snap[i].uapi_mem_region = 0; + break; + case XE_PL_VRAM0: + snap->snap[i].uapi_mem_region = 1; + break; + case XE_PL_VRAM1: + snap->snap[i].uapi_mem_region = 2; + break; + } } else if (xe_vma_is_userptr(vma)) { struct mm_struct *mm = to_userptr_vma(vma)->userptr.notifier.mm; @@ -4089,8 +4141,14 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) snap->snap[i].data = ERR_PTR(-EFAULT); snap->snap[i].bo_ofs = xe_vma_userptr(vma); + snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR; + snap->snap[i].uapi_mem_region = 0; + } else if (xe_vma_is_null(vma)) { + snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL; + snap->snap[i].uapi_mem_region = -1; } else { snap->snap[i].data = ERR_PTR(-ENOENT); + snap->snap[i].uapi_mem_region = -1; } i++; } @@ -4109,7 +4167,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) struct xe_bo *bo = snap->snap[i].bo; int err; - if (IS_ERR(snap->snap[i].data)) + if (IS_ERR(snap->snap[i].data) || + snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) continue; snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); @@ -4155,15 +4214,32 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) return; } + drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags); for (i = 0; i < snap->num_snaps; i++) { drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); + drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n", + snap->snap[i].ofs, + snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ? + "read_only" : "read_write", + snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ? + "null_sparse" : + snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ? + "userptr" : "bo", + snap->snap[i].uapi_mem_region == -1 ? 0 : + BIT(snap->snap[i].uapi_mem_region), + snap->snap[i].pat_index, + snap->snap[i].cpu_caching); + if (IS_ERR(snap->snap[i].data)) { drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, PTR_ERR(snap->snap[i].data)); continue; } + if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL) + continue; + drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { @@ -4317,6 +4393,8 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, if (is_madvise) vops.flags |= XE_VMA_OPS_FLAG_MADVISE; + else + vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP; err = vm_bind_ioctl_ops_parse(vm, ops, &vops); if (err) @@ -4390,6 +4468,46 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) return xe_vm_alloc_vma(vm, &map_req, true); } +static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma) +{ + return vma && xe_vma_is_cpu_addr_mirror(vma) && + xe_vma_has_default_mem_attrs(vma); +} + +/** + * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs + * @vm: VM to search within + * @start: Input/output pointer to the starting address of the range + * @end: Input/output pointer to the end address of the range + * + * Given a range defined by @start and @range, this function checks the VMAs + * immediately before and after the range. If those neighboring VMAs are + * CPU-address-mirrored and have default memory attributes, the function + * updates @start and @range to include them. This extended range can then + * be used for merging or other operations that require a unified VMA. + * + * The function does not perform the merge itself; it only computes the + * mergeable boundaries. + */ +void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end) +{ + struct xe_vma *prev, *next; + + lockdep_assert_held(&vm->lock); + + if (*start >= SZ_4K) { + prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K); + if (is_cpu_addr_vma_with_default_attr(prev)) + *start = xe_vma_start(prev); + } + + if (*end < vm->size) { + next = xe_vm_find_vma_by_addr(vm, *end + 1); + if (is_cpu_addr_vma_with_default_attr(next)) + *end = xe_vma_end(next); + } +} + /** * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma * @vm: Pointer to the xe_vm structure diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index ef8a5019574e..361f10b3c453 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -68,6 +68,9 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); bool xe_vma_has_default_mem_attrs(struct xe_vma *vma); +void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, + u64 *start, + u64 *end); /** * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs * @vm: The vm diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 2168ef052499..18bad1dd08e6 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -467,6 +467,7 @@ struct xe_vma_ops { #define XE_VMA_OPS_FLAG_MADVISE BIT(1) #define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) #define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3) +#define XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP BIT(4) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index d50baefcd124..1b9e9b028975 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -156,12 +156,11 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; u64 offset; u32 reg; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return -ETIMEDOUT; if (GRAPHICS_VER(xe) >= 20) { @@ -193,7 +192,6 @@ static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset) offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; } - xe_force_wake_put(gt_to_fw(gt), fw_ref); *poffset = offset; return 0; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e32dd2fde6f1..a93717e77da0 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -15,6 +15,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" #include "regs/xe_regs.h" #include "xe_device_types.h" #include "xe_force_wake.h" @@ -216,20 +217,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR)) }, - /* Xe2_LPG */ - - { XE_RTP_NAME("16020975621"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14018157293"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0), - SET(XEHPC_L3CLOS_MASK(1), ~0), - SET(XEHPC_L3CLOS_MASK(2), ~0), - SET(XEHPC_L3CLOS_MASK(3), ~0)) - }, - /* Xe2_LPM */ { XE_RTP_NAME("14017421178"), @@ -315,6 +302,10 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, }; static const struct xe_rtp_entry_sr engine_was[] = { @@ -504,11 +495,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) }, - { XE_RTP_NAME("14018957109"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE)) - }, { XE_RTP_NAME("14020338487"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) @@ -518,11 +504,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, - { XE_RTP_NAME("14019322943"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE)) - }, { XE_RTP_NAME("14018471104"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) @@ -693,7 +674,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, { XE_RTP_NAME("18041344222"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + XE_RTP_RULES(GRAPHICS_VERSION(3000), FUNC(xe_rtp_match_first_render_or_compute), FUNC(xe_rtp_match_not_sriov_vf), FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), @@ -799,17 +780,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("16020518922"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(FF_MODE, - DIS_TE_AUTOSTRIP | - DIS_MESH_PARTIAL_AUTOSTRIP | - DIS_MESH_AUTOSTRIP), - SET(VFLSKPD, - DIS_PARTIAL_AUTOSTRIP | - DIS_AUTOSTRIP)) - }, { XE_RTP_NAME("14019386621"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) @@ -818,20 +788,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, - { XE_RTP_NAME("14020013138"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) - }, { XE_RTP_NAME("14019988906"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, - { XE_RTP_NAME("16020183090"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) - }, { XE_RTP_NAME("18033852989"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 7ca7258eb5d8..5cd7fa6d2a5c 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -16,15 +16,11 @@ 16017236439 PLATFORM(PVC) 14019821291 MEDIA_VERSION_RANGE(1300, 2000) 14015076503 MEDIA_VERSION(1300) -16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) -14018913170 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) - MEDIA_VERSION(2000), GRAPHICS_STEP(A0, A1) - GRAPHICS_VERSION_RANGE(1270, 1274) +14018913170 GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) 14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) -14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) MEDIA_VERSION(2000) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f64dc0eff0e6..726e481574fe 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -106,6 +106,7 @@ extern "C" { #define DRM_XE_OBSERVATION 0x0b #define DRM_XE_MADVISE 0x0c #define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e /* Must be kept compact -- no holes */ @@ -123,6 +124,7 @@ extern "C" { #define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) #define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) #define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr) +#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) /** * DOC: Xe IOCTL Extensions @@ -210,8 +212,12 @@ struct drm_xe_ext_set_property { /** @pad: MBZ */ __u32 pad; - /** @value: property value */ - __u64 value; + union { + /** @value: property value */ + __u64 value; + /** @ptr: pointer to user value */ + __u64 ptr; + }; /** @reserved: Reserved */ __u64 reserved[2]; @@ -403,6 +409,9 @@ struct drm_xe_query_mem_regions { * has low latency hint support * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the * device has CPU address mirroring support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the + * device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION. + * This is exposed only on Xe2+. * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -421,6 +430,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -791,6 +801,17 @@ struct drm_xe_device_query { * need to use VRAM for display surfaces, therefore the kernel requires * setting this flag for such objects, otherwise an error is thrown on * small-bar systems. + * - %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION - Allows userspace to + * hint that compression (CCS) should be disabled for the buffer being + * created. This can avoid unnecessary memory operations and CCS state + * management. + * On pre-Xe2 platforms, this flag is currently rejected as compression + * control is not supported via PAT index. On Xe2+ platforms, compression + * is controlled via PAT entries. If this flag is set, the driver will reject + * any VM bind that requests a PAT index enabling compression for this BO. + * Note: On dGPU platforms, there is currently no change in behavior with + * this flag, but future improvements may leverage it. The current benefit is + * primarily applicable to iGPU platforms. * * @cpu_caching supports the following values: * - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back @@ -837,6 +858,7 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) #define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1) #define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2) +#define DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION (1 << 3) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed @@ -1252,6 +1274,20 @@ struct drm_xe_vm_bind { * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP - Create a multi-queue group + * or add secondary queues to a multi-queue group. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_CREATE flag set, + * then a new multi-queue group is created with this queue as the primary queue + * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary + * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag + * set, then the multi-queue group is kept active after the primary queue is + * destroyed. + * All the other non-relevant bits of extension's 'value' field while adding the + * primary or the secondary queues of the group must be set to 0. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue + * priority within the multi-queue group. Current valid priority values are 0–2 + * (default is 1), with higher values indicating higher priority. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1292,6 +1328,11 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 +#define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 +#define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62) +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -1655,6 +1696,9 @@ enum drm_xe_oa_unit_type { /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ DRM_XE_OA_UNIT_TYPE_OAM_SAG, + + /** @DRM_XE_OA_UNIT_TYPE_MERT: MERT OA unit */ + DRM_XE_OA_UNIT_TYPE_MERT, }; /** @@ -1677,12 +1721,19 @@ struct drm_xe_oa_unit { #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) #define DRM_XE_OA_CAPS_OAM (1 << 4) +#define DRM_XE_OA_CAPS_OA_UNIT_GT_ID (1 << 5) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; + /** @gt_id: gt id for this OA unit */ + __u16 gt_id; + + /** @reserved1: MBZ */ + __u16 reserved1[3]; + /** @reserved: MBZ */ - __u64 reserved[4]; + __u64 reserved[3]; /** @num_engines: number of engines in @eci array */ __u64 num_engines; @@ -2274,6 +2325,30 @@ struct drm_xe_vm_query_mem_range_attr { }; +/** + * struct drm_xe_exec_queue_set_property - exec queue set property + * + * Sets execution queue properties dynamically. + * Currently only %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY + * property can be dynamically set. + */ +struct drm_xe_exec_queue_set_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + #if defined(__cplusplus) } #endif