mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 11:06:41 -05:00
Merge tag 'amd-drm-next-6.16-2025-05-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.16-2025-05-09: amdgpu: - IPS fixes - DSC cleanup - DC Scaling updates - DC FP fixes - Fused I2C-over-AUX updates - SubVP fixes - Freesync fix - DMUB AUX fixes - VCN fix - Hibernation fixes - HDP fixes - DCN 2.1 fixes - DPIA fixes - DMUB updates - Use drm_file_err in amdgpu - Enforce isolation updates - Use new dma_fence helpers - USERQ fixes - Documentation updates - Misc code cleanups - SR-IOV updates - RAS updates - PSP 12 cleanups amdkfd: - Update error messages for SDMA - Userptr updates drm: - Add drm_file_err function dma-buf: - Add a helper to sort and deduplicate dma_fence arrays From: Alex Deucher <alexander.deucher@amd.com> Link: https://lore.kernel.org/r/20250509230951.3871914-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
23
Documentation/gpu/amdgpu/amd-hardware-list-info.rst
Normal file
23
Documentation/gpu/amdgpu/amd-hardware-list-info.rst
Normal file
@@ -0,0 +1,23 @@
|
||||
=================================================
|
||||
AMD Hardware Components Information per Product
|
||||
=================================================
|
||||
|
||||
On this page, you can find the AMD product name and which component version is
|
||||
part of it.
|
||||
|
||||
Accelerated Processing Units (APU) Info
|
||||
---------------------------------------
|
||||
|
||||
.. csv-table::
|
||||
:header-rows: 1
|
||||
:widths: 3, 2, 2, 1, 1, 1, 1
|
||||
:file: ./apu-asic-info-table.csv
|
||||
|
||||
Discrete GPU Info
|
||||
-----------------
|
||||
|
||||
.. csv-table::
|
||||
:header-rows: 1
|
||||
:widths: 3, 2, 2, 1, 1, 1
|
||||
:file: ./dgpu-asic-info-table.csv
|
||||
|
||||
@@ -12,18 +12,39 @@ we have a dedicated glossary for Display Core at
|
||||
The number of CUs that are active on the system. The number of active
|
||||
CUs may be less than SE * SH * CU depending on the board configuration.
|
||||
|
||||
BACO
|
||||
Bus Alive, Chip Off
|
||||
|
||||
BOCO
|
||||
Bus Off, Chip Off
|
||||
|
||||
CE
|
||||
Constant Engine
|
||||
|
||||
CIK
|
||||
Sea Islands
|
||||
|
||||
CB
|
||||
Color Buffer
|
||||
|
||||
CP
|
||||
Command Processor
|
||||
|
||||
CPLIB
|
||||
Content Protection Library
|
||||
|
||||
CS
|
||||
Command Submission
|
||||
|
||||
CSB
|
||||
Clear State Indirect Buffer
|
||||
|
||||
CU
|
||||
Compute Unit
|
||||
|
||||
DB
|
||||
Depth Buffer
|
||||
|
||||
DFS
|
||||
Digital Frequency Synthesizer
|
||||
|
||||
@@ -33,6 +54,9 @@ we have a dedicated glossary for Display Core at
|
||||
EOP
|
||||
End Of Pipe/Pipeline
|
||||
|
||||
FLR
|
||||
Function Level Reset
|
||||
|
||||
GART
|
||||
Graphics Address Remapping Table. This is the name we use for the GPUVM
|
||||
page table used by the GPU kernel driver. It remaps system resources
|
||||
@@ -45,6 +69,12 @@ we have a dedicated glossary for Display Core at
|
||||
GC
|
||||
Graphics and Compute
|
||||
|
||||
GDS
|
||||
Global Data Share
|
||||
|
||||
GE
|
||||
Geometry Engine
|
||||
|
||||
GMC
|
||||
Graphic Memory Controller
|
||||
|
||||
@@ -80,6 +110,9 @@ we have a dedicated glossary for Display Core at
|
||||
KCQ
|
||||
Kernel Compute Queue
|
||||
|
||||
KFD
|
||||
Kernel Fusion Driver
|
||||
|
||||
KGQ
|
||||
Kernel Graphics Queue
|
||||
|
||||
@@ -89,6 +122,9 @@ we have a dedicated glossary for Display Core at
|
||||
MC
|
||||
Memory Controller
|
||||
|
||||
MCBP
|
||||
Mid Command Buffer Preemption
|
||||
|
||||
ME
|
||||
MicroEngine (Graphics)
|
||||
|
||||
@@ -104,6 +140,9 @@ we have a dedicated glossary for Display Core at
|
||||
MQD
|
||||
Memory Queue Descriptor
|
||||
|
||||
PA
|
||||
Primitive Assembler / Physical Address
|
||||
|
||||
PFP
|
||||
Pre-Fetch Parser (Graphics)
|
||||
|
||||
@@ -113,24 +152,39 @@ we have a dedicated glossary for Display Core at
|
||||
PSP
|
||||
Platform Security Processor
|
||||
|
||||
RB
|
||||
Render Backends. Some people called it ROPs.
|
||||
|
||||
RLC
|
||||
RunList Controller. This name is a remnant of past ages and doesn't have
|
||||
much meaning today. It's a group of general-purpose helper engines for
|
||||
the GFX block. It's involved in GFX power management and SR-IOV, among
|
||||
other things.
|
||||
|
||||
SC
|
||||
Scan Converter
|
||||
|
||||
SDMA
|
||||
System DMA
|
||||
|
||||
SE
|
||||
Shader Engine
|
||||
|
||||
SGPR
|
||||
Scalar General-Purpose Registers
|
||||
|
||||
SH
|
||||
SHader array
|
||||
|
||||
SI
|
||||
Southern Islands
|
||||
|
||||
SMU/SMC
|
||||
System Management Unit / System Management Controller
|
||||
|
||||
SPI
|
||||
Shader Processor Input
|
||||
|
||||
SRLC
|
||||
Save/Restore List Control
|
||||
|
||||
@@ -143,12 +197,21 @@ we have a dedicated glossary for Display Core at
|
||||
SS
|
||||
Spread Spectrum
|
||||
|
||||
SX
|
||||
Shader Export
|
||||
|
||||
TA
|
||||
Trusted Application
|
||||
|
||||
TC
|
||||
Texture Cache
|
||||
|
||||
TOC
|
||||
Table of Contents
|
||||
|
||||
UMSCH
|
||||
User Mode Scheduler
|
||||
|
||||
UVD
|
||||
Unified Video Decoder
|
||||
|
||||
@@ -158,5 +221,17 @@ we have a dedicated glossary for Display Core at
|
||||
VCN
|
||||
Video Codec Next
|
||||
|
||||
VGPR
|
||||
Vector General-Purpose Registers
|
||||
|
||||
VMID
|
||||
Virtual Memory ID
|
||||
|
||||
VPE
|
||||
Video Processing Engine
|
||||
|
||||
XCC
|
||||
Accelerator Core Complex
|
||||
|
||||
XCP
|
||||
Accelerator Core Partition
|
||||
|
||||
@@ -13,3 +13,5 @@ Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
|
||||
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
|
||||
Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
|
||||
Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0
|
||||
Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4
|
||||
Ryzen AI Max 300 series, Strix Halo, 3.5.1, 11.5.1, 4.0.6, 6.1.1, 14.0.1
|
||||
|
||||
|
210
Documentation/gpu/amdgpu/debugfs.rst
Normal file
210
Documentation/gpu/amdgpu/debugfs.rst
Normal file
@@ -0,0 +1,210 @@
|
||||
==============
|
||||
AMDGPU DebugFS
|
||||
==============
|
||||
|
||||
The amdgpu driver provides a number of debugfs files to aid in debugging
|
||||
issues in the driver. These are usually found in
|
||||
/sys/kernel/debug/dri/<num>.
|
||||
|
||||
DebugFS Files
|
||||
=============
|
||||
|
||||
amdgpu_benchmark
|
||||
----------------
|
||||
|
||||
Run benchmarks using the DMA engine the driver uses for GPU memory paging.
|
||||
Write a number to the file to run the test. The results are written to the
|
||||
kernel log. VRAM is on device memory (dGPUs) or carve out (APUs) and GTT
|
||||
(Graphics Translation Tables) is system memory that is accessible by the GPU.
|
||||
The following tests are available:
|
||||
|
||||
- 1: simple test, VRAM to GTT and GTT to VRAM
|
||||
- 2: simple test, VRAM to VRAM
|
||||
- 3: GTT to VRAM, buffer size sweep, powers of 2
|
||||
- 4: VRAM to GTT, buffer size sweep, powers of 2
|
||||
- 5: VRAM to VRAM, buffer size sweep, powers of 2
|
||||
- 6: GTT to VRAM, buffer size sweep, common display sizes
|
||||
- 7: VRAM to GTT, buffer size sweep, common display sizes
|
||||
- 8: VRAM to VRAM, buffer size sweep, common display sizes
|
||||
|
||||
amdgpu_test_ib
|
||||
--------------
|
||||
|
||||
Read this file to run simple IB (Indirect Buffer) tests on all kernel managed
|
||||
rings. IBs are command buffers usually generated by userspace applications
|
||||
which are submitted to the kernel for execution on an particular GPU engine.
|
||||
This just runs the simple IB tests included in the kernel. These tests
|
||||
are engine specific and verify that IB submission works.
|
||||
|
||||
amdgpu_discovery
|
||||
----------------
|
||||
|
||||
Provides raw access to the IP discovery binary provided by the GPU. Read this
|
||||
file to access the raw binary. This is useful for verifying the contents of
|
||||
the IP discovery table. It is chip specific.
|
||||
|
||||
amdgpu_vbios
|
||||
------------
|
||||
|
||||
Provides raw access to the ROM binary image from the GPU. Read this file to
|
||||
access the raw binary. This is useful for verifying the contents of the
|
||||
video BIOS ROM. It is board specific.
|
||||
|
||||
amdgpu_evict_gtt
|
||||
----------------
|
||||
|
||||
Evict all buffers from the GTT memory pool. Read this file to evict all
|
||||
buffers from this pool.
|
||||
|
||||
amdgpu_evict_vram
|
||||
-----------------
|
||||
|
||||
Evict all buffers from the VRAM memory pool. Read this file to evict all
|
||||
buffers from this pool.
|
||||
|
||||
amdgpu_gpu_recover
|
||||
------------------
|
||||
|
||||
Trigger a GPU reset. Read this file to trigger reset the entire GPU.
|
||||
All work currently running on the GPU will be lost.
|
||||
|
||||
amdgpu_ring_<name>
|
||||
------------------
|
||||
|
||||
Provides read access to the kernel managed ring buffers for each ring <name>.
|
||||
These are useful for debugging problems on a particular ring. The ring buffer
|
||||
is how the CPU sends commands to the GPU. The CPU writes commands into the
|
||||
buffer and then asks the GPU engine to process it. This is the raw binary
|
||||
contents of the ring buffer. Use a tool like UMR to decode the rings into human
|
||||
readable form.
|
||||
|
||||
amdgpu_mqd_<name>
|
||||
-----------------
|
||||
|
||||
Provides read access to the kernel managed MQD (Memory Queue Descriptor) for
|
||||
ring <name> managed by the kernel driver. MQDs define the features of the ring
|
||||
and are used to store the ring's state when it is not connected to hardware.
|
||||
The driver writes the requested ring features and metadata (GPU addresses of
|
||||
the ring itself and associated buffers) to the MQD and the firmware uses the MQD
|
||||
to populate the hardware when the ring is mapped to a hardware slot. Only
|
||||
available on engines which use MQDs. This provides access to the raw MQD
|
||||
binary.
|
||||
|
||||
amdgpu_error_<name>
|
||||
-------------------
|
||||
|
||||
Provides an interface to set an error code on the dma fences associated with
|
||||
ring <name>. The error code specified is propogated to all fences associated
|
||||
with the ring. Use this to inject a fence error into a ring.
|
||||
|
||||
amdgpu_pm_info
|
||||
--------------
|
||||
|
||||
Provides human readable information about the power management features
|
||||
and state of the GPU. This includes current GFX clock, Memory clock,
|
||||
voltages, average SoC power, temperature, GFX load, Memory load, SMU
|
||||
feature mask, VCN power state, clock and power gating features.
|
||||
|
||||
amdgpu_firmware_info
|
||||
--------------------
|
||||
|
||||
Lists the firmware versions for all firmwares used by the GPU. Only
|
||||
entries with a non-0 version are valid. If the version is 0, the firmware
|
||||
is not valid for the GPU.
|
||||
|
||||
amdgpu_fence_info
|
||||
-----------------
|
||||
|
||||
Shows the last signalled and emitted fence sequence numbers for each
|
||||
kernel driver managed ring. Fences are associated with submissions
|
||||
to the engine. Emitted fences have been submitted to the ring
|
||||
and signalled fences have been signalled by the GPU. Rings with a
|
||||
larger emitted fence value have outstanding work that is still being
|
||||
processed by the engine that owns that ring. When the emitted and
|
||||
signalled fence values are equal, the ring is idle.
|
||||
|
||||
amdgpu_gem_info
|
||||
---------------
|
||||
|
||||
Lists all of the PIDs using the GPU and the GPU buffers that they have
|
||||
allocated. This lists the buffer size, pool (VRAM, GTT, etc.), and buffer
|
||||
attributes (CPU access required, CPU cache attributes, etc.).
|
||||
|
||||
amdgpu_vm_info
|
||||
--------------
|
||||
|
||||
Lists all of the PIDs using the GPU and the GPU buffers that they have
|
||||
allocated as well as the status of those buffers relative to that process'
|
||||
GPU virtual address space (e.g., evicted, idle, invalidated, etc.).
|
||||
|
||||
amdgpu_sa_info
|
||||
--------------
|
||||
|
||||
Prints out all of the suballocations (sa) by the suballocation manager in the
|
||||
kernel driver. Prints the GPU address, size, and fence info associated
|
||||
with each suballocation. The suballocations are used internally within
|
||||
the kernel driver for various things.
|
||||
|
||||
amdgpu_<pool>_mm
|
||||
----------------
|
||||
|
||||
Prints TTM information about the memory pool <pool>.
|
||||
|
||||
amdgpu_vram
|
||||
-----------
|
||||
|
||||
Provides direct access to VRAM. Used by tools like UMR to inspect
|
||||
objects in VRAM.
|
||||
|
||||
amdgpu_iomem
|
||||
------------
|
||||
|
||||
Provides direct access to GTT memory. Used by tools like UMR to inspect
|
||||
GTT memory.
|
||||
|
||||
amdgpu_regs_*
|
||||
-------------
|
||||
|
||||
Provides direct access to various register aperatures on the GPU. Used
|
||||
by tools like UMR to access GPU registers.
|
||||
|
||||
amdgpu_regs2
|
||||
------------
|
||||
|
||||
Provides an IOCTL interface used by UMR for interacting with GPU registers.
|
||||
|
||||
|
||||
amdgpu_sensors
|
||||
--------------
|
||||
|
||||
Provides an interface to query GPU power metrics (temperature, average
|
||||
power, etc.). Used by tools like UMR to query GPU power metrics.
|
||||
|
||||
|
||||
amdgpu_gca_config
|
||||
-----------------
|
||||
|
||||
Provides an interface to query GPU details (Graphics/Compute Array config,
|
||||
PCI config, GPU family, etc.). Used by tools like UMR to query GPU details.
|
||||
|
||||
amdgpu_wave
|
||||
-----------
|
||||
|
||||
Used to query GFX/compute wave information from the hardware. Used by tools
|
||||
like UMR to query GFX/compute wave information.
|
||||
|
||||
amdgpu_gpr
|
||||
----------
|
||||
|
||||
Used to query GFX/compute GPR (General Purpose Register) information from the
|
||||
hardware. Used by tools like UMR to query GPRs when debugging shaders.
|
||||
|
||||
amdgpu_gprwave
|
||||
--------------
|
||||
|
||||
Provides an IOCTL interface used by UMR for interacting with shader waves.
|
||||
|
||||
amdgpu_fw_attestation
|
||||
---------------------
|
||||
|
||||
Provides an interface for reading back firmware attestation records.
|
||||
@@ -2,6 +2,13 @@
|
||||
GPU Debugging
|
||||
===============
|
||||
|
||||
General Debugging Options
|
||||
=========================
|
||||
|
||||
The DebugFS section provides documentation on a number files to aid in debugging
|
||||
issues on the GPU.
|
||||
|
||||
|
||||
GPUVM Debugging
|
||||
===============
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ of the display parameters, but the userspace might also cause this issue. One
|
||||
way to identify the source of the problem is to take a screenshot or make a
|
||||
desktop video capture when the problem happens; after checking the
|
||||
screenshot/video recording, if you don't see any of the artifacts, it means
|
||||
that the issue is likely on the the driver side. If you can still see the
|
||||
that the issue is likely on the driver side. If you can still see the
|
||||
problem in the data collected, it is an issue that probably happened during
|
||||
rendering, and the display code just got the framebuffer already corrupted.
|
||||
|
||||
|
||||
@@ -67,36 +67,66 @@ GC (Graphics and Compute)
|
||||
This is the graphics and compute engine, i.e., the block that
|
||||
encompasses the 3D pipeline and and shader blocks. This is by far the
|
||||
largest block on the GPU. The 3D pipeline has tons of sub-blocks. In
|
||||
addition to that, it also contains the CP microcontrollers (ME, PFP,
|
||||
CE, MEC) and the RLC microcontroller. It's exposed to userspace for
|
||||
user mode drivers (OpenGL, Vulkan, OpenCL, etc.)
|
||||
addition to that, it also contains the CP microcontrollers (ME, PFP, CE,
|
||||
MEC) and the RLC microcontroller. It's exposed to userspace for user mode
|
||||
drivers (OpenGL, Vulkan, OpenCL, etc.). More details in :ref:`Graphics (GFX)
|
||||
and Compute <amdgpu-gc>`.
|
||||
|
||||
VCN (Video Core Next)
|
||||
This is the multi-media engine. It handles video and image encode and
|
||||
decode. It's exposed to userspace for user mode drivers (VA-API,
|
||||
OpenMAX, etc.)
|
||||
|
||||
Graphics and Compute Microcontrollers
|
||||
-------------------------------------
|
||||
.. _pipes-and-queues-description:
|
||||
|
||||
CP (Command Processor)
|
||||
The name for the hardware block that encompasses the front end of the
|
||||
GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers
|
||||
(PFP, ME, CE, MEC). The firmware that runs on these microcontrollers
|
||||
provides the driver interface to interact with the GFX/Compute engine.
|
||||
GFX, Compute, and SDMA Overall Behavior
|
||||
=======================================
|
||||
|
||||
MEC (MicroEngine Compute)
|
||||
This is the microcontroller that controls the compute queues on the
|
||||
GFX/compute engine.
|
||||
.. note:: For simplicity, whenever the term block is used in this section, it
|
||||
means GFX, Compute, and SDMA.
|
||||
|
||||
MES (MicroEngine Scheduler)
|
||||
This is a new engine for managing queues. This is currently unused.
|
||||
GFX, Compute and SDMA share a similar form of operation that can be abstracted
|
||||
to facilitate understanding of the behavior of these blocks. See the figure
|
||||
below illustrating the common components of these blocks:
|
||||
|
||||
RLC (RunList Controller)
|
||||
This is another microcontroller in the GFX/Compute engine. It handles
|
||||
power management related functionality within the GFX/Compute engine.
|
||||
The name is a vestige of old hardware where it was originally added
|
||||
and doesn't really have much relation to what the engine does now.
|
||||
.. kernel-figure:: pipe_and_queue_abstraction.svg
|
||||
|
||||
In the central part of this figure, you can see two hardware elements, one called
|
||||
**Pipes** and another called **Queues**; it is important to highlight that Queues
|
||||
must be associated with a Pipe and vice-versa. Every specific hardware IP may have
|
||||
a different number of Pipes and, in turn, a different number of Queues; for
|
||||
example, GFX 11 has two Pipes and two Queues per Pipe for the GFX front end.
|
||||
|
||||
Pipe is the hardware that processes the instructions available in the Queues;
|
||||
in other words, it is a thread executing the operations inserted in the Queue.
|
||||
One crucial characteristic of Pipes is that they can only execute one Queue at
|
||||
a time; no matter if the hardware has multiple Queues in the Pipe, it only runs
|
||||
one Queue per Pipe.
|
||||
|
||||
Pipes have the mechanics of swapping between queues at the hardware level.
|
||||
Nonetheless, they only make use of Queues that are considered mapped. Pipes can
|
||||
switch between queues based on any of the following inputs:
|
||||
|
||||
1. Command Stream;
|
||||
2. Packet by Packet;
|
||||
3. Other hardware requests the change (e.g., MES).
|
||||
|
||||
Queues within Pipes are defined by the Hardware Queue Descriptors (HQD).
|
||||
Associated with the HQD concept, we have the Memory Queue Descriptor (MQD),
|
||||
which is responsible for storing information about the state of each of the
|
||||
available Queues in the memory. The state of a Queue contains information such
|
||||
as the GPU virtual address of the queue itself, save areas, doorbell, etc. The
|
||||
MQD also stores the HQD registers, which are vital for activating or
|
||||
deactivating a given Queue. The scheduling firmware (e.g., MES) is responsible
|
||||
for loading HQDs from MQDs and vice versa.
|
||||
|
||||
The Queue-switching process can also happen with the firmware requesting the
|
||||
preemption or unmapping of a Queue. The firmware waits for the HQD_ACTIVE bit
|
||||
to change to low before saving the state into the MQD. To make a different
|
||||
Queue become active, the firmware copies the MQD state into the HQD registers
|
||||
and loads any additional state. Finally, it sets the HQD_ACTIVE bit to high to
|
||||
indicate that the queue is active. The Pipe will then execute work from active
|
||||
Queues.
|
||||
|
||||
Driver Structure
|
||||
================
|
||||
@@ -110,7 +140,8 @@ Some useful constructs:
|
||||
KIQ (Kernel Interface Queue)
|
||||
This is a control queue used by the kernel driver to manage other gfx
|
||||
and compute queues on the GFX/compute engine. You can use it to
|
||||
map/unmap additional queues, etc.
|
||||
map/unmap additional queues, etc. This is replaced by MES on
|
||||
GFX 11 and newer hardware.
|
||||
|
||||
IB (Indirect Buffer)
|
||||
A command buffer for a particular engine. Rather than writing
|
||||
|
||||
@@ -50,23 +50,6 @@ board_info
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
|
||||
:doc: board_info
|
||||
|
||||
Accelerated Processing Units (APU) Info
|
||||
---------------------------------------
|
||||
|
||||
.. csv-table::
|
||||
:header-rows: 1
|
||||
:widths: 3, 2, 2, 1, 1, 1, 1
|
||||
:file: ./apu-asic-info-table.csv
|
||||
|
||||
Discrete GPU Info
|
||||
-----------------
|
||||
|
||||
.. csv-table::
|
||||
:header-rows: 1
|
||||
:widths: 3, 2, 2, 1, 1, 1
|
||||
:file: ./dgpu-asic-info-table.csv
|
||||
|
||||
|
||||
GPU Memory Usage Information
|
||||
============================
|
||||
|
||||
|
||||
52
Documentation/gpu/amdgpu/gc/index.rst
Normal file
52
Documentation/gpu/amdgpu/gc/index.rst
Normal file
@@ -0,0 +1,52 @@
|
||||
.. _amdgpu-gc:
|
||||
|
||||
========================================
|
||||
drm/amdgpu - Graphics and Compute (GC)
|
||||
========================================
|
||||
|
||||
The relationship between the CPU and GPU can be described as the
|
||||
producer-consumer problem, where the CPU fills out a buffer with operations
|
||||
(producer) to be executed by the GPU (consumer). The requested operations in
|
||||
the buffer are called Command Packets, which can be summarized as a compressed
|
||||
way of transmitting command information to the graphics controller.
|
||||
|
||||
The component that acts as the front end between the CPU and the GPU is called
|
||||
the Command Processor (CP). This component is responsible for providing greater
|
||||
flexibility to the GC since CP makes it possible to program various aspects of
|
||||
the GPU pipeline. CP also coordinates the communication between the CPU and GPU
|
||||
via a mechanism named **Ring Buffers**, where the CPU appends information to
|
||||
the buffer while the GPU removes operations. It is relevant to highlight that a
|
||||
CPU can add a pointer to the Ring Buffer that points to another region of
|
||||
memory outside the Ring Buffer, and CP can handle it; this mechanism is called
|
||||
**Indirect Buffer (IB)**. CP receives and parses the Command Streams (CS), and
|
||||
writes the operations to the correct hardware blocks.
|
||||
|
||||
Graphics (GFX) and Compute Microcontrollers
|
||||
-------------------------------------------
|
||||
|
||||
GC is a large block, and as a result, it has multiple firmware associated with
|
||||
it. Some of them are:
|
||||
|
||||
CP (Command Processor)
|
||||
The name for the hardware block that encompasses the front end of the
|
||||
GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers
|
||||
(PFP, ME, CE, MEC). The firmware that runs on these microcontrollers
|
||||
provides the driver interface to interact with the GFX/Compute engine.
|
||||
|
||||
MEC (MicroEngine Compute)
|
||||
This is the microcontroller that controls the compute queues on the
|
||||
GFX/compute engine.
|
||||
|
||||
MES (MicroEngine Scheduler)
|
||||
This is the engine for managing queues. For more details check
|
||||
:ref:`MicroEngine Scheduler (MES) <amdgpu-mes>`.
|
||||
|
||||
RLC (RunList Controller)
|
||||
This is another microcontroller in the GFX/Compute engine. It handles
|
||||
power management related functionality within the GFX/Compute engine.
|
||||
The name is a vestige of old hardware where it was originally added
|
||||
and doesn't really have much relation to what the engine does now.
|
||||
|
||||
.. toctree::
|
||||
|
||||
mes.rst
|
||||
38
Documentation/gpu/amdgpu/gc/mes.rst
Normal file
38
Documentation/gpu/amdgpu/gc/mes.rst
Normal file
@@ -0,0 +1,38 @@
|
||||
.. _amdgpu-mes:
|
||||
|
||||
=============================
|
||||
MicroEngine Scheduler (MES)
|
||||
=============================
|
||||
|
||||
.. note::
|
||||
Queue and ring buffer are used as a synonymous.
|
||||
|
||||
.. note::
|
||||
This section assumes that you are familiar with the concept of Pipes, Queues, and GC.
|
||||
If not, check :ref:`GFX, Compute, and SDMA Overall Behavior<pipes-and-queues-description>`
|
||||
and :ref:`drm/amdgpu - Graphics and Compute (GC) <amdgpu-gc>`.
|
||||
|
||||
Every GFX has a pipe component with one or more hardware queues. Pipes can
|
||||
switch between queues depending on certain conditions, and one of the
|
||||
components that can request a queue switch to a pipe is the MicroEngine
|
||||
Scheduler (MES). Whenever the driver is initialized, it creates one MQD per
|
||||
hardware queue, and then the MQDs are handed to the MES firmware for mapping
|
||||
to:
|
||||
|
||||
1. Kernel Queues (legacy): This queue is statically mapped to HQDs and never
|
||||
preempted. Even though this is a legacy feature, it is the current default, and
|
||||
most existing hardware supports it. When an application submits work to the
|
||||
kernel driver, it submits all of the application command buffers to the kernel
|
||||
queues. The CS IOCTL takes the command buffer from the applications and
|
||||
schedules them on the kernel queue.
|
||||
|
||||
2. User Queues: These queues are dynamically mapped to the HQDs. Regarding the
|
||||
utilization of User Queues, the userspace application will create its user
|
||||
queues and submit work directly to its user queues with no need to IOCTL for
|
||||
each submission and no need to share a single kernel queue.
|
||||
|
||||
In terms of User Queues, MES can dynamically map them to the HQD. If there are
|
||||
more MQDs than HQDs, the MES firmware will preempt other user queues to make
|
||||
sure each queues get a time slice; in other words, MES is a microcontroller
|
||||
that handles the mapping and unmapping of MQDs into HQDs, as well as the
|
||||
priorities and oversubscription of MQDs.
|
||||
@@ -7,8 +7,10 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
|
||||
|
||||
.. toctree::
|
||||
|
||||
module-parameters
|
||||
driver-core
|
||||
amd-hardware-list-info
|
||||
module-parameters
|
||||
gc/index
|
||||
display/index
|
||||
flashing
|
||||
xgmi
|
||||
@@ -16,5 +18,6 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
|
||||
thermal
|
||||
driver-misc
|
||||
debugging
|
||||
debugfs
|
||||
process-isolation
|
||||
amdgpu-glossary
|
||||
|
||||
1279
Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg
Normal file
1279
Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg
Normal file
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 61 KiB |
@@ -79,6 +79,41 @@ static int fence_cmp(const void *_a, const void *_b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dma_fence_dedup_array - Sort and deduplicate an array of dma_fence pointers
|
||||
* @fences: Array of dma_fence pointers to be deduplicated
|
||||
* @num_fences: Number of entries in the @fences array
|
||||
*
|
||||
* Sorts the input array by context, then removes duplicate
|
||||
* fences with the same context, keeping only the most recent one.
|
||||
*
|
||||
* The array is modified in-place and unreferenced duplicate fences are released
|
||||
* via dma_fence_put(). The function returns the new number of fences after
|
||||
* deduplication.
|
||||
*
|
||||
* Return: Number of unique fences remaining in the array.
|
||||
*/
|
||||
int dma_fence_dedup_array(struct dma_fence **fences, int num_fences)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
sort(fences, num_fences, sizeof(*fences), fence_cmp, NULL);
|
||||
|
||||
/*
|
||||
* Only keep the most recent fence for each context.
|
||||
*/
|
||||
j = 0;
|
||||
for (i = 1; i < num_fences; i++) {
|
||||
if (fences[i]->context == fences[j]->context)
|
||||
dma_fence_put(fences[i]);
|
||||
else
|
||||
fences[++j] = fences[i];
|
||||
}
|
||||
|
||||
return ++j;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dma_fence_dedup_array);
|
||||
|
||||
/* Implementation for the dma_fence_merge() marco, don't use directly */
|
||||
struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
|
||||
struct dma_fence **fences,
|
||||
@@ -87,7 +122,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
|
||||
struct dma_fence *tmp, *unsignaled = NULL, **array;
|
||||
struct dma_fence_array *result;
|
||||
ktime_t timestamp;
|
||||
int i, j, count;
|
||||
int i, count;
|
||||
|
||||
count = 0;
|
||||
timestamp = ns_to_ktime(0);
|
||||
@@ -141,19 +176,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
|
||||
if (count == 0 || count == 1)
|
||||
goto return_fastpath;
|
||||
|
||||
sort(array, count, sizeof(*array), fence_cmp, NULL);
|
||||
|
||||
/*
|
||||
* Only keep the most recent fence for each context.
|
||||
*/
|
||||
j = 0;
|
||||
for (i = 1; i < count; i++) {
|
||||
if (array[i]->context == array[j]->context)
|
||||
dma_fence_put(array[i]);
|
||||
else
|
||||
array[++j] = array[i];
|
||||
}
|
||||
count = ++j;
|
||||
count = dma_fence_dedup_array(array, count);
|
||||
|
||||
if (count > 1) {
|
||||
result = dma_fence_array_create(count, array,
|
||||
|
||||
@@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
|
||||
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
|
||||
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
|
||||
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
|
||||
amdgpu_cper.o
|
||||
amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
|
||||
|
||||
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
|
||||
|
||||
@@ -174,7 +174,10 @@ amdgpu-y += \
|
||||
amdgpu-y += \
|
||||
amdgpu_mes.o \
|
||||
mes_v11_0.o \
|
||||
mes_v12_0.o
|
||||
mes_v12_0.o \
|
||||
|
||||
# add GFX userqueue support
|
||||
amdgpu-y += mes_userqueue.o
|
||||
|
||||
# add UVD block
|
||||
amdgpu-y += \
|
||||
@@ -253,6 +256,8 @@ amdgpu-y += \
|
||||
# add amdkfd interfaces
|
||||
amdgpu-y += amdgpu_amdkfd.o
|
||||
|
||||
# add gfx usermode queue
|
||||
amdgpu-y += amdgpu_userq.o
|
||||
|
||||
ifneq ($(CONFIG_HSA_AMD),)
|
||||
AMDKFD_PATH := ../amdkfd
|
||||
|
||||
@@ -113,6 +113,8 @@
|
||||
#include "amdgpu_xcp.h"
|
||||
#include "amdgpu_seq64.h"
|
||||
#include "amdgpu_reg_state.h"
|
||||
#include "amdgpu_userq.h"
|
||||
#include "amdgpu_eviction_fence.h"
|
||||
#if defined(CONFIG_DRM_AMD_ISP)
|
||||
#include "amdgpu_isp.h"
|
||||
#endif
|
||||
@@ -228,7 +230,7 @@ extern int amdgpu_force_asic_type;
|
||||
extern int amdgpu_smartshift_bias;
|
||||
extern int amdgpu_use_xgmi_p2p;
|
||||
extern int amdgpu_mtype_local;
|
||||
extern bool enforce_isolation;
|
||||
extern int amdgpu_enforce_isolation;
|
||||
#ifdef CONFIG_HSA_AMD
|
||||
extern int sched_policy;
|
||||
extern bool debug_evictions;
|
||||
@@ -266,8 +268,10 @@ extern int amdgpu_umsch_mm_fwlog;
|
||||
|
||||
extern int amdgpu_user_partt_mode;
|
||||
extern int amdgpu_agp;
|
||||
extern int amdgpu_rebar;
|
||||
|
||||
extern int amdgpu_wbrf;
|
||||
extern int amdgpu_user_queue;
|
||||
|
||||
#define AMDGPU_VM_MAX_NUM_CTX 4096
|
||||
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
|
||||
@@ -488,7 +492,6 @@ struct amdgpu_flip_work {
|
||||
bool async;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* file private structure
|
||||
*/
|
||||
@@ -501,6 +504,11 @@ struct amdgpu_fpriv {
|
||||
struct mutex bo_list_lock;
|
||||
struct idr bo_list_handles;
|
||||
struct amdgpu_ctx_mgr ctx_mgr;
|
||||
struct amdgpu_userq_mgr userq_mgr;
|
||||
|
||||
/* Eviction fence infra */
|
||||
struct amdgpu_eviction_fence_mgr evf_mgr;
|
||||
|
||||
/** GPU partition selection */
|
||||
uint32_t xcp_id;
|
||||
};
|
||||
@@ -512,12 +520,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
|
||||
*/
|
||||
#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
|
||||
|
||||
/**
|
||||
* amdgpu_wb - This struct is used for small GPU memory allocation.
|
||||
*
|
||||
* This struct is used to allocate a small amount of GPU memory that can be
|
||||
* used to shadow certain states into the memory. This is especially useful for
|
||||
* providing easy CPU access to some states without requiring register access
|
||||
* (e.g., if some block is power gated, reading register may be problematic).
|
||||
*
|
||||
* Note: the term writeback was initially used because many of the amdgpu
|
||||
* components had some level of writeback memory, and this struct initially
|
||||
* described those components.
|
||||
*/
|
||||
struct amdgpu_wb {
|
||||
|
||||
/**
|
||||
* @wb_obj:
|
||||
*
|
||||
* Buffer Object used for the writeback memory.
|
||||
*/
|
||||
struct amdgpu_bo *wb_obj;
|
||||
|
||||
/**
|
||||
* @wb:
|
||||
*
|
||||
* Pointer to the first writeback slot. In terms of CPU address
|
||||
* this value can be accessed directly by using the offset as an index.
|
||||
* For the GPU address, it is necessary to use gpu_addr and the offset.
|
||||
*/
|
||||
volatile uint32_t *wb;
|
||||
|
||||
/**
|
||||
* @gpu_addr:
|
||||
*
|
||||
* Writeback base address in the GPU.
|
||||
*/
|
||||
uint64_t gpu_addr;
|
||||
u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
|
||||
|
||||
/**
|
||||
* @num_wb:
|
||||
*
|
||||
* Number of writeback slots reserved for amdgpu.
|
||||
*/
|
||||
u32 num_wb;
|
||||
|
||||
/**
|
||||
* @used:
|
||||
*
|
||||
* Track the writeback slot already used.
|
||||
*/
|
||||
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
|
||||
|
||||
/**
|
||||
* @lock:
|
||||
*
|
||||
* Protects read and write of the used field array.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
@@ -551,6 +609,7 @@ struct amdgpu_allowed_register_entry {
|
||||
* are reset depends on the ASIC. Notably doesn't reset IPs
|
||||
* shared with the CPU on APUs or the memory controllers (so
|
||||
* VRAM is not lost). Not available on all ASICs.
|
||||
* @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
|
||||
* @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
|
||||
* but without powering off the PCI bus. Suitable only for
|
||||
* discrete GPUs.
|
||||
@@ -568,6 +627,7 @@ enum amd_reset_method {
|
||||
AMD_RESET_METHOD_MODE0,
|
||||
AMD_RESET_METHOD_MODE1,
|
||||
AMD_RESET_METHOD_MODE2,
|
||||
AMD_RESET_METHOD_LINK,
|
||||
AMD_RESET_METHOD_BACO,
|
||||
AMD_RESET_METHOD_PCI,
|
||||
AMD_RESET_METHOD_ON_INIT,
|
||||
@@ -821,6 +881,11 @@ struct amdgpu_mqd_prop {
|
||||
uint32_t hqd_queue_priority;
|
||||
bool allow_tunneling;
|
||||
bool hqd_active;
|
||||
uint64_t shadow_addr;
|
||||
uint64_t gds_bkup_addr;
|
||||
uint64_t csa_addr;
|
||||
uint64_t fence_address;
|
||||
bool tmz_queue;
|
||||
};
|
||||
|
||||
struct amdgpu_mqd {
|
||||
@@ -829,6 +894,12 @@ struct amdgpu_mqd {
|
||||
struct amdgpu_mqd_prop *p);
|
||||
};
|
||||
|
||||
struct amdgpu_pcie_reset_ctx {
|
||||
bool in_link_reset;
|
||||
bool occurs_dpc;
|
||||
bool audio_suspended;
|
||||
};
|
||||
|
||||
/*
|
||||
* Custom Init levels could be defined for different situations where a full
|
||||
* initialization of all hardware blocks are not expected. Sample cases are
|
||||
@@ -853,6 +924,14 @@ struct amdgpu_init_level {
|
||||
struct amdgpu_reset_domain;
|
||||
struct amdgpu_fru_info;
|
||||
|
||||
enum amdgpu_enforce_isolation_mode {
|
||||
AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
|
||||
AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
|
||||
AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
|
||||
AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
|
||||
*/
|
||||
@@ -1081,6 +1160,13 @@ struct amdgpu_device {
|
||||
bool enable_uni_mes;
|
||||
struct amdgpu_mes mes;
|
||||
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
|
||||
const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
|
||||
|
||||
/* xarray used to retrieve the user queue fence driver reference
|
||||
* in the EOP interrupt handler to signal the particular user
|
||||
* queue fence.
|
||||
*/
|
||||
struct xarray userq_xa;
|
||||
|
||||
/* df */
|
||||
struct amdgpu_df df;
|
||||
@@ -1160,6 +1246,8 @@ struct amdgpu_device {
|
||||
struct pci_saved_state *pci_state;
|
||||
pci_channel_state_t pci_channel_state;
|
||||
|
||||
struct amdgpu_pcie_reset_ctx pcie_reset_ctx;
|
||||
|
||||
/* Track auto wait count on s_barrier settings */
|
||||
bool barrier_has_auto_waitcnt;
|
||||
|
||||
@@ -1193,10 +1281,11 @@ struct amdgpu_device {
|
||||
bool debug_enable_ras_aca;
|
||||
bool debug_exp_resets;
|
||||
bool debug_disable_gpu_ring_reset;
|
||||
bool debug_vm_userptr;
|
||||
|
||||
/* Protection for the following isolation structure */
|
||||
struct mutex enforce_isolation_mutex;
|
||||
bool enforce_isolation[MAX_XCP];
|
||||
enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP];
|
||||
struct amdgpu_isolation {
|
||||
void *owner;
|
||||
struct dma_fence *spearhead;
|
||||
@@ -1210,6 +1299,10 @@ struct amdgpu_device {
|
||||
* in KFD: VRAM or GTT.
|
||||
*/
|
||||
bool apu_prefer_gtt;
|
||||
|
||||
struct list_head userq_mgr_list;
|
||||
struct mutex userq_mutex;
|
||||
bool userq_halt_for_enforce_isolation;
|
||||
};
|
||||
|
||||
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
|
||||
@@ -1464,6 +1557,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
|
||||
const u32 array_size);
|
||||
|
||||
int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
|
||||
int amdgpu_device_link_reset(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_supports_atpx(struct drm_device *dev);
|
||||
bool amdgpu_device_supports_px(struct drm_device *dev);
|
||||
bool amdgpu_device_supports_boco(struct drm_device *dev);
|
||||
@@ -1614,11 +1708,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap
|
||||
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
|
||||
bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
|
||||
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
|
||||
void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
|
||||
#else
|
||||
static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
|
||||
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
|
||||
static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
|
||||
#endif
|
||||
|
||||
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
|
||||
|
||||
@@ -120,6 +120,9 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
|
||||
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
|
||||
RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
|
||||
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
|
||||
|
||||
if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
|
||||
RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
|
||||
}
|
||||
|
||||
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
|
||||
|
||||
@@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
|
||||
#endif /* CONFIG_AMD_PMC */
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_choose_low_power_state
|
||||
*
|
||||
* @adev: amdgpu_device_pointer
|
||||
*
|
||||
* Choose the target low power state for the GPU
|
||||
*/
|
||||
void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->in_runpm)
|
||||
return;
|
||||
|
||||
if (amdgpu_acpi_is_s0ix_active(adev))
|
||||
adev->in_s0ix = true;
|
||||
else if (amdgpu_acpi_is_s3_active(adev))
|
||||
adev->in_s3 = true;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SUSPEND */
|
||||
|
||||
@@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
||||
if (ret != -EFAULT)
|
||||
return ret;
|
||||
|
||||
/* If applications unmap memory before destroying the userptr
|
||||
* from the KFD, trigger a segmentation fault in VM debug mode.
|
||||
*/
|
||||
if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
|
||||
pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
|
||||
pid_nr(process_info->pid), mem->va);
|
||||
|
||||
// Send GPU VM fault to user space
|
||||
kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
|
||||
mem->va);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -252,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
|
||||
|
||||
if (!adev->pm.fw) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_TAHITI:
|
||||
strcpy(fw_name, "radeon/tahiti_smc.bin");
|
||||
break;
|
||||
case CHIP_PITCAIRN:
|
||||
if ((adev->pdev->revision == 0x81) &&
|
||||
((adev->pdev->device == 0x6810) ||
|
||||
(adev->pdev->device == 0x6811))) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "radeon/pitcairn_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_VERDE:
|
||||
if (((adev->pdev->device == 0x6820) &&
|
||||
((adev->pdev->revision == 0x81) ||
|
||||
(adev->pdev->revision == 0x83))) ||
|
||||
((adev->pdev->device == 0x6821) &&
|
||||
((adev->pdev->revision == 0x83) ||
|
||||
(adev->pdev->revision == 0x87))) ||
|
||||
((adev->pdev->revision == 0x87) &&
|
||||
((adev->pdev->device == 0x6823) ||
|
||||
(adev->pdev->device == 0x682b)))) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "radeon/verde_k_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "radeon/verde_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_OLAND:
|
||||
if (((adev->pdev->revision == 0x81) &&
|
||||
((adev->pdev->device == 0x6600) ||
|
||||
(adev->pdev->device == 0x6604) ||
|
||||
(adev->pdev->device == 0x6605) ||
|
||||
(adev->pdev->device == 0x6610))) ||
|
||||
((adev->pdev->revision == 0x83) &&
|
||||
(adev->pdev->device == 0x6610))) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "radeon/oland_k_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "radeon/oland_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_HAINAN:
|
||||
if (((adev->pdev->revision == 0x81) &&
|
||||
(adev->pdev->device == 0x6660)) ||
|
||||
((adev->pdev->revision == 0x83) &&
|
||||
((adev->pdev->device == 0x6660) ||
|
||||
(adev->pdev->device == 0x6663) ||
|
||||
(adev->pdev->device == 0x6665) ||
|
||||
(adev->pdev->device == 0x6667)))) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "radeon/hainan_k_smc.bin");
|
||||
} else if ((adev->pdev->revision == 0xc3) &&
|
||||
(adev->pdev->device == 0x6665)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "radeon/banks_k_2_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "radeon/hainan_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_BONAIRE:
|
||||
if ((adev->pdev->revision == 0x80) ||
|
||||
(adev->pdev->revision == 0x81) ||
|
||||
(adev->pdev->device == 0x665f)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "amdgpu/bonaire_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/bonaire_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_HAWAII:
|
||||
if (adev->pdev->revision == 0x80) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "amdgpu/hawaii_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/hawaii_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_TOPAZ:
|
||||
@@ -338,76 +277,76 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
|
||||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
|
||||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
|
||||
} else
|
||||
strcpy(fw_name, "amdgpu/topaz_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/topaz_smc.bin");
|
||||
break;
|
||||
case CHIP_TONGA:
|
||||
if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
|
||||
((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
|
||||
} else
|
||||
strcpy(fw_name, "amdgpu/tonga_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/tonga_smc.bin");
|
||||
break;
|
||||
case CHIP_FIJI:
|
||||
strcpy(fw_name, "amdgpu/fiji_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/fiji_smc.bin");
|
||||
break;
|
||||
case CHIP_POLARIS11:
|
||||
if (type == CGS_UCODE_ID_SMU) {
|
||||
if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
|
||||
} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "amdgpu/polaris11_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris11_smc.bin");
|
||||
}
|
||||
} else if (type == CGS_UCODE_ID_SMU_SK) {
|
||||
strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_POLARIS10:
|
||||
if (type == CGS_UCODE_ID_SMU) {
|
||||
if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
|
||||
} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "amdgpu/polaris10_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris10_smc.bin");
|
||||
}
|
||||
} else if (type == CGS_UCODE_ID_SMU_SK) {
|
||||
strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_POLARIS12:
|
||||
if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
|
||||
info->is_kicker = true;
|
||||
strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
|
||||
} else {
|
||||
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/polaris12_smc.bin");
|
||||
}
|
||||
break;
|
||||
case CHIP_VEGAM:
|
||||
strcpy(fw_name, "amdgpu/vegam_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/vegam_smc.bin");
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
if ((adev->pdev->device == 0x687f) &&
|
||||
((adev->pdev->revision == 0xc0) ||
|
||||
(adev->pdev->revision == 0xc1) ||
|
||||
(adev->pdev->revision == 0xc3)))
|
||||
strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
|
||||
else
|
||||
strcpy(fw_name, "amdgpu/vega10_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/vega10_smc.bin");
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
strcpy(fw_name, "amdgpu/vega12_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/vega12_smc.bin");
|
||||
break;
|
||||
case CHIP_VEGA20:
|
||||
strcpy(fw_name, "amdgpu/vega20_smc.bin");
|
||||
strscpy(fw_name, "amdgpu/vega20_smc.bin");
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("SMC firmware not supported\n");
|
||||
|
||||
@@ -549,7 +549,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!amdgpu_aca_is_enabled(adev))
|
||||
if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
|
||||
return 0;
|
||||
|
||||
r = amdgpu_cper_ring_init(adev);
|
||||
@@ -568,7 +568,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev)
|
||||
|
||||
int amdgpu_cper_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!amdgpu_aca_is_enabled(adev))
|
||||
if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
|
||||
return 0;
|
||||
|
||||
adev->cper.enabled = false;
|
||||
|
||||
@@ -296,7 +296,25 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
||||
num_ibs[i], &p->jobs[i]);
|
||||
if (ret)
|
||||
goto free_all_kdata;
|
||||
p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id];
|
||||
switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
|
||||
case AMDGPU_ENFORCE_ISOLATION_DISABLE:
|
||||
default:
|
||||
p->jobs[i]->enforce_isolation = false;
|
||||
p->jobs[i]->run_cleaner_shader = false;
|
||||
break;
|
||||
case AMDGPU_ENFORCE_ISOLATION_ENABLE:
|
||||
p->jobs[i]->enforce_isolation = true;
|
||||
p->jobs[i]->run_cleaner_shader = true;
|
||||
break;
|
||||
case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
|
||||
p->jobs[i]->enforce_isolation = true;
|
||||
p->jobs[i]->run_cleaner_shader = false;
|
||||
break;
|
||||
case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
|
||||
p->jobs[i]->enforce_isolation = true;
|
||||
p->jobs[i]->run_cleaner_shader = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
p->gang_leader = p->jobs[p->gang_leader_idx];
|
||||
|
||||
@@ -349,6 +367,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
|
||||
ring = amdgpu_job_ring(job);
|
||||
ib = &job->ibs[job->num_ibs++];
|
||||
|
||||
/* submissions to kernel queues are disabled */
|
||||
if (ring->no_user_submission)
|
||||
return -EINVAL;
|
||||
|
||||
/* MM engine doesn't support user fences */
|
||||
if (p->uf_bo && ring->funcs->no_user_fence)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -85,6 +85,7 @@
|
||||
|
||||
#if IS_ENABLED(CONFIG_X86)
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#endif
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
|
||||
@@ -1680,6 +1681,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return 0;
|
||||
|
||||
if (!amdgpu_rebar)
|
||||
return 0;
|
||||
|
||||
/* resizing on Dell G5 SE platforms causes problems with runtime pm */
|
||||
if ((amdgpu_runtime_pm != 0) &&
|
||||
adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
|
||||
@@ -1870,6 +1874,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_X86)
|
||||
struct cpuinfo_x86 *c = &cpu_data(0);
|
||||
|
||||
if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
|
||||
return false;
|
||||
|
||||
if (c->x86 == 6 &&
|
||||
adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
|
||||
switch (c->x86_model) {
|
||||
case VFM_MODEL(INTEL_ALDERLAKE):
|
||||
case VFM_MODEL(INTEL_ALDERLAKE_L):
|
||||
case VFM_MODEL(INTEL_RAPTORLAKE):
|
||||
case VFM_MODEL(INTEL_RAPTORLAKE_P):
|
||||
case VFM_MODEL(INTEL_RAPTORLAKE_S):
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_should_use_aspm - check if the device should program ASPM
|
||||
*
|
||||
@@ -1894,7 +1927,7 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
|
||||
}
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return false;
|
||||
if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
|
||||
if (amdgpu_device_aspm_support_quirk(adev))
|
||||
return false;
|
||||
return pcie_aspm_enabled(adev->pdev);
|
||||
}
|
||||
@@ -2112,8 +2145,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
||||
|
||||
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
|
||||
|
||||
for (i = 0; i < MAX_XCP; i++)
|
||||
adev->enforce_isolation[i] = !!enforce_isolation;
|
||||
for (i = 0; i < MAX_XCP; i++) {
|
||||
switch (amdgpu_enforce_isolation) {
|
||||
case -1:
|
||||
case 0:
|
||||
default:
|
||||
/* disable */
|
||||
adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
|
||||
break;
|
||||
case 1:
|
||||
/* enable */
|
||||
adev->enforce_isolation[i] =
|
||||
AMDGPU_ENFORCE_ISOLATION_ENABLE;
|
||||
break;
|
||||
case 2:
|
||||
/* enable legacy mode */
|
||||
adev->enforce_isolation[i] =
|
||||
AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
|
||||
break;
|
||||
case 3:
|
||||
/* enable only process isolation without submitting cleaner shader */
|
||||
adev->enforce_isolation[i] =
|
||||
AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2689,6 +2745,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for IP version 9.4.3 with A0 hardware */
|
||||
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
|
||||
!amdgpu_device_get_rev_id(adev)) {
|
||||
dev_err(adev->dev, "Unsupported A0 hardware\n");
|
||||
return -ENODEV; /* device unsupported - no device error */
|
||||
}
|
||||
|
||||
if (amdgpu_has_atpx() &&
|
||||
(amdgpu_is_atpx_hybrid() ||
|
||||
amdgpu_has_atpx_dgpu_power_cntl()) &&
|
||||
@@ -2701,7 +2764,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
||||
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
|
||||
}
|
||||
|
||||
|
||||
adev->pm.pp_feature = amdgpu_pp_feature_mask;
|
||||
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
|
||||
@@ -3172,6 +3234,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
|
||||
* always assumed to be lost.
|
||||
*/
|
||||
switch (amdgpu_asic_reset_method(adev)) {
|
||||
case AMD_RESET_METHOD_LINK:
|
||||
case AMD_RESET_METHOD_BACO:
|
||||
case AMD_RESET_METHOD_MODE1:
|
||||
return true;
|
||||
@@ -3455,6 +3518,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
|
||||
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
|
||||
|
||||
amdgpu_amdkfd_suspend(adev, false);
|
||||
amdgpu_userq_suspend(adev);
|
||||
|
||||
/* Workaround for ASICs need to disable SMC first */
|
||||
amdgpu_device_smu_fini_early(adev);
|
||||
@@ -4307,9 +4371,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
amdgpu_sync_create(&adev->isolation[i].active);
|
||||
amdgpu_sync_create(&adev->isolation[i].prev);
|
||||
}
|
||||
mutex_init(&adev->gfx.kfd_sch_mutex);
|
||||
mutex_init(&adev->gfx.userq_sch_mutex);
|
||||
mutex_init(&adev->gfx.workload_profile_mutex);
|
||||
mutex_init(&adev->vcn.workload_profile_mutex);
|
||||
mutex_init(&adev->userq_mutex);
|
||||
|
||||
amdgpu_device_init_apu_flags(adev);
|
||||
|
||||
@@ -4329,12 +4394,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
spin_lock_init(&adev->virt.rlcg_reg_lock);
|
||||
spin_lock_init(&adev->wb.lock);
|
||||
|
||||
xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
|
||||
|
||||
INIT_LIST_HEAD(&adev->reset_list);
|
||||
|
||||
INIT_LIST_HEAD(&adev->ras_list);
|
||||
|
||||
INIT_LIST_HEAD(&adev->pm.od_kobj_list);
|
||||
|
||||
INIT_LIST_HEAD(&adev->userq_mgr_list);
|
||||
|
||||
INIT_DELAYED_WORK(&adev->delayed_init_work,
|
||||
amdgpu_device_delayed_init_work_handler);
|
||||
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
|
||||
@@ -4907,28 +4976,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
|
||||
* @data: data
|
||||
*
|
||||
* This function is called when the system is about to suspend or hibernate.
|
||||
* It is used to evict resources from the device before the system goes to
|
||||
* sleep while there is still access to swap.
|
||||
* It is used to set the appropriate flags so that eviction can be optimized
|
||||
* in the pm prepare callback.
|
||||
*/
|
||||
static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
|
||||
void *data)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
|
||||
int r;
|
||||
|
||||
switch (mode) {
|
||||
case PM_HIBERNATION_PREPARE:
|
||||
adev->in_s4 = true;
|
||||
fallthrough;
|
||||
case PM_SUSPEND_PREPARE:
|
||||
r = amdgpu_device_evict_resources(adev);
|
||||
/*
|
||||
* This is considered non-fatal at this time because
|
||||
* amdgpu_device_prepare() will also fatally evict resources.
|
||||
* See https://gitlab.freedesktop.org/drm/amd/-/issues/3781
|
||||
*/
|
||||
if (r)
|
||||
drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r);
|
||||
break;
|
||||
case PM_POST_HIBERNATION:
|
||||
adev->in_s4 = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4949,15 +5010,13 @@ int amdgpu_device_prepare(struct drm_device *dev)
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
int i, r;
|
||||
|
||||
amdgpu_choose_low_power_state(adev);
|
||||
|
||||
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
|
||||
return 0;
|
||||
|
||||
/* Evict the majority of BOs before starting suspend sequence */
|
||||
r = amdgpu_device_evict_resources(adev);
|
||||
if (r)
|
||||
goto unprepare;
|
||||
return r;
|
||||
|
||||
flush_delayed_work(&adev->gfx.gfx_off_delay_work);
|
||||
|
||||
@@ -4968,15 +5027,10 @@ int amdgpu_device_prepare(struct drm_device *dev)
|
||||
continue;
|
||||
r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
|
||||
if (r)
|
||||
goto unprepare;
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unprepare:
|
||||
adev->in_s0ix = adev->in_s3 = adev->in_s4 = false;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -5018,8 +5072,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
|
||||
|
||||
amdgpu_device_ip_suspend_phase1(adev);
|
||||
|
||||
if (!adev->in_s0ix)
|
||||
if (!adev->in_s0ix) {
|
||||
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
|
||||
amdgpu_userq_suspend(adev);
|
||||
}
|
||||
|
||||
r = amdgpu_device_evict_resources(adev);
|
||||
if (r)
|
||||
@@ -5086,6 +5142,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
|
||||
r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
|
||||
if (r)
|
||||
goto exit;
|
||||
|
||||
r = amdgpu_userq_resume(adev);
|
||||
if (r)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
r = amdgpu_device_ip_late_init(adev);
|
||||
@@ -5134,9 +5194,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
|
||||
}
|
||||
adev->in_suspend = false;
|
||||
|
||||
if (adev->enable_mes)
|
||||
amdgpu_mes_self_test(adev);
|
||||
|
||||
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
|
||||
DRM_WARN("smart shift update failed\n");
|
||||
|
||||
@@ -5517,6 +5574,29 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_device_link_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
dev_info(adev->dev, "GPU link reset\n");
|
||||
|
||||
if (!adev->pcie_reset_ctx.occurs_dpc)
|
||||
ret = amdgpu_dpm_link_reset(adev);
|
||||
|
||||
if (ret)
|
||||
goto link_reset_failed;
|
||||
|
||||
ret = amdgpu_psp_wait_for_bootloader(adev);
|
||||
if (ret)
|
||||
goto link_reset_failed;
|
||||
|
||||
return 0;
|
||||
|
||||
link_reset_failed:
|
||||
dev_err(adev->dev, "GPU link reset failed\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_reset_context *reset_context)
|
||||
{
|
||||
@@ -5821,6 +5901,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
|
||||
|
||||
switch (amdgpu_asic_reset_method(adev)) {
|
||||
case AMD_RESET_METHOD_MODE1:
|
||||
case AMD_RESET_METHOD_LINK:
|
||||
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
|
||||
break;
|
||||
case AMD_RESET_METHOD_MODE2:
|
||||
@@ -5937,94 +6018,42 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @job: which job trigger hang
|
||||
* @reset_context: amdgpu reset context pointer
|
||||
*
|
||||
* Attempt to reset the GPU if it has hung (all asics).
|
||||
* Attempt to do soft-reset or full-reset and reinitialize Asic
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
static int amdgpu_device_halt_activities(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_reset_context *reset_context)
|
||||
struct amdgpu_reset_context *reset_context,
|
||||
struct list_head *device_list,
|
||||
struct amdgpu_hive_info *hive,
|
||||
bool need_emergency_restart)
|
||||
{
|
||||
struct list_head device_list, *device_list_handle = NULL;
|
||||
bool job_signaled = false;
|
||||
struct amdgpu_hive_info *hive = NULL;
|
||||
struct list_head *device_list_handle = NULL;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
int i, r = 0;
|
||||
bool need_emergency_restart = false;
|
||||
bool audio_suspended = false;
|
||||
int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
|
||||
|
||||
/*
|
||||
* If it reaches here because of hang/timeout and a RAS error is
|
||||
* detected at the same time, let RAS recovery take care of it.
|
||||
*/
|
||||
if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
|
||||
!amdgpu_sriov_vf(adev) &&
|
||||
reset_context->src != AMDGPU_RESET_SRC_RAS) {
|
||||
dev_dbg(adev->dev,
|
||||
"Gpu recovery from source: %d yielding to RAS error recovery handling",
|
||||
reset_context->src);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Special case: RAS triggered and full reset isn't supported
|
||||
*/
|
||||
need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
|
||||
|
||||
/*
|
||||
* Flush RAM to disk so that after reboot
|
||||
* the user can read log and see why the system rebooted.
|
||||
*/
|
||||
if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
|
||||
amdgpu_ras_get_context(adev)->reboot) {
|
||||
DRM_WARN("Emergency reboot.");
|
||||
|
||||
ksys_sync_helper();
|
||||
emergency_restart();
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "GPU %s begin!\n",
|
||||
need_emergency_restart ? "jobs stop":"reset");
|
||||
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive)
|
||||
mutex_lock(&hive->hive_lock);
|
||||
|
||||
reset_context->job = job;
|
||||
reset_context->hive = hive;
|
||||
/*
|
||||
* Build list of devices to reset.
|
||||
* In case we are in XGMI hive mode, resort the device list
|
||||
* to put adev in the 1st position.
|
||||
*/
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
list_add_tail(&tmp_adev->reset_list, &device_list);
|
||||
list_add_tail(&tmp_adev->reset_list, device_list);
|
||||
if (adev->shutdown)
|
||||
tmp_adev->shutdown = true;
|
||||
if (adev->pcie_reset_ctx.occurs_dpc)
|
||||
tmp_adev->pcie_reset_ctx.in_link_reset = true;
|
||||
}
|
||||
if (!list_is_first(&adev->reset_list, &device_list))
|
||||
list_rotate_to_front(&adev->reset_list, &device_list);
|
||||
device_list_handle = &device_list;
|
||||
if (!list_is_first(&adev->reset_list, device_list))
|
||||
list_rotate_to_front(&adev->reset_list, device_list);
|
||||
device_list_handle = device_list;
|
||||
} else {
|
||||
list_add_tail(&adev->reset_list, &device_list);
|
||||
device_list_handle = &device_list;
|
||||
list_add_tail(&adev->reset_list, device_list);
|
||||
device_list_handle = device_list;
|
||||
}
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) {
|
||||
r = amdgpu_device_health_check(device_list_handle);
|
||||
if (r)
|
||||
goto end_reset;
|
||||
return r;
|
||||
}
|
||||
|
||||
/* We need to lock reset domain only once both for XGMI and single device */
|
||||
@@ -6048,7 +6077,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
* some audio codec errors.
|
||||
*/
|
||||
if (!amdgpu_device_suspend_display_audio(tmp_adev))
|
||||
audio_suspended = true;
|
||||
tmp_adev->pcie_reset_ctx.audio_suspended = true;
|
||||
|
||||
amdgpu_ras_set_error_query_ready(tmp_adev, false);
|
||||
|
||||
@@ -6066,6 +6095,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
|
||||
/* disable ras on ALL IPs */
|
||||
if (!need_emergency_restart &&
|
||||
(!adev->pcie_reset_ctx.occurs_dpc) &&
|
||||
amdgpu_device_ip_need_full_reset(tmp_adev))
|
||||
amdgpu_ras_suspend(tmp_adev);
|
||||
|
||||
@@ -6083,24 +6113,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
atomic_inc(&tmp_adev->gpu_reset_counter);
|
||||
}
|
||||
|
||||
if (need_emergency_restart)
|
||||
goto skip_sched_resume;
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must check guilty signal here since after this point all old
|
||||
* HW fences are force signaled.
|
||||
*
|
||||
* job->base holds a reference to parent fence
|
||||
*/
|
||||
if (job && dma_fence_is_signaled(&job->hw_fence)) {
|
||||
job_signaled = true;
|
||||
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
|
||||
goto skip_hw_reset;
|
||||
}
|
||||
static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
|
||||
struct list_head *device_list,
|
||||
struct amdgpu_reset_context *reset_context)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
|
||||
int r = 0;
|
||||
|
||||
retry: /* Rest of adevs pre asic reset from XGMI hive. */
|
||||
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
|
||||
list_for_each_entry(tmp_adev, device_list, reset_list) {
|
||||
if (adev->pcie_reset_ctx.occurs_dpc)
|
||||
tmp_adev->no_hw_access = true;
|
||||
r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
|
||||
if (adev->pcie_reset_ctx.occurs_dpc)
|
||||
tmp_adev->no_hw_access = false;
|
||||
/*TODO Should we stop ?*/
|
||||
if (r) {
|
||||
dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
|
||||
@@ -6112,6 +6142,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
/* Actual ASIC resets if needed.*/
|
||||
/* Host driver will handle XGMI hive reset for SRIOV */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
|
||||
/* Bail out of reset early */
|
||||
if (amdgpu_ras_is_rma(adev))
|
||||
return -ENODEV;
|
||||
|
||||
if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
|
||||
dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
|
||||
amdgpu_ras_set_fed(adev, true);
|
||||
@@ -6126,12 +6161,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
if (r)
|
||||
adev->asic_reset_res = r;
|
||||
} else {
|
||||
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
|
||||
r = amdgpu_do_asic_reset(device_list, reset_context);
|
||||
if (r && r == -EAGAIN)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
|
||||
list_for_each_entry(tmp_adev, device_list, reset_list) {
|
||||
/*
|
||||
* Drop any pending non scheduler resets queued before reset is done.
|
||||
* Any reset scheduled after this point would be valid. Scheduler resets
|
||||
@@ -6141,10 +6176,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
amdgpu_device_stop_pending_resets(tmp_adev);
|
||||
}
|
||||
|
||||
skip_hw_reset:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_device_sched_resume(struct list_head *device_list,
|
||||
struct amdgpu_reset_context *reset_context,
|
||||
bool job_signaled)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
int i, r = 0;
|
||||
|
||||
/* Post ASIC reset for all devs .*/
|
||||
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
|
||||
list_for_each_entry(tmp_adev, device_list, reset_list) {
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = tmp_adev->rings[i];
|
||||
@@ -6180,8 +6223,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
|
||||
skip_sched_resume:
|
||||
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
|
||||
struct list_head *device_list,
|
||||
bool need_emergency_restart)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list, reset_list) {
|
||||
/* unlock kfd: SRIOV would do it separately */
|
||||
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
|
||||
amdgpu_amdkfd_post_reset(tmp_adev);
|
||||
@@ -6192,18 +6243,114 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
if (!adev->kfd.init_complete)
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
|
||||
if (audio_suspended)
|
||||
if (tmp_adev->pcie_reset_ctx.audio_suspended)
|
||||
amdgpu_device_resume_display_audio(tmp_adev);
|
||||
|
||||
amdgpu_device_unset_mp1_state(tmp_adev);
|
||||
|
||||
amdgpu_ras_set_error_query_ready(tmp_adev, true);
|
||||
|
||||
}
|
||||
|
||||
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
|
||||
tmp_adev = list_first_entry(device_list, struct amdgpu_device,
|
||||
reset_list);
|
||||
amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @job: which job trigger hang
|
||||
* @reset_context: amdgpu reset context pointer
|
||||
*
|
||||
* Attempt to reset the GPU if it has hung (all asics).
|
||||
* Attempt to do soft-reset or full-reset and reinitialize Asic
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_reset_context *reset_context)
|
||||
{
|
||||
struct list_head device_list;
|
||||
bool job_signaled = false;
|
||||
struct amdgpu_hive_info *hive = NULL;
|
||||
int r = 0;
|
||||
bool need_emergency_restart = false;
|
||||
|
||||
/*
|
||||
* If it reaches here because of hang/timeout and a RAS error is
|
||||
* detected at the same time, let RAS recovery take care of it.
|
||||
*/
|
||||
if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
|
||||
!amdgpu_sriov_vf(adev) &&
|
||||
reset_context->src != AMDGPU_RESET_SRC_RAS) {
|
||||
dev_dbg(adev->dev,
|
||||
"Gpu recovery from source: %d yielding to RAS error recovery handling",
|
||||
reset_context->src);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case: RAS triggered and full reset isn't supported
|
||||
*/
|
||||
need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
|
||||
|
||||
/*
|
||||
* Flush RAM to disk so that after reboot
|
||||
* the user can read log and see why the system rebooted.
|
||||
*/
|
||||
if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
|
||||
amdgpu_ras_get_context(adev)->reboot) {
|
||||
DRM_WARN("Emergency reboot.");
|
||||
|
||||
ksys_sync_helper();
|
||||
emergency_restart();
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "GPU %s begin!\n",
|
||||
need_emergency_restart ? "jobs stop":"reset");
|
||||
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive)
|
||||
mutex_lock(&hive->hive_lock);
|
||||
|
||||
reset_context->job = job;
|
||||
reset_context->hive = hive;
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
|
||||
r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
|
||||
hive, need_emergency_restart);
|
||||
if (r)
|
||||
goto end_reset;
|
||||
|
||||
if (need_emergency_restart)
|
||||
goto skip_sched_resume;
|
||||
/*
|
||||
* Must check guilty signal here since after this point all old
|
||||
* HW fences are force signaled.
|
||||
*
|
||||
* job->base holds a reference to parent fence
|
||||
*/
|
||||
if (job && dma_fence_is_signaled(&job->hw_fence)) {
|
||||
job_signaled = true;
|
||||
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
|
||||
goto skip_hw_reset;
|
||||
}
|
||||
|
||||
r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
|
||||
if (r)
|
||||
goto end_reset;
|
||||
skip_hw_reset:
|
||||
r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
|
||||
if (r)
|
||||
goto end_reset;
|
||||
skip_sched_resume:
|
||||
amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
|
||||
end_reset:
|
||||
if (hive) {
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
@@ -6587,12 +6734,15 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
|
||||
{
|
||||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
int i;
|
||||
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
|
||||
struct amdgpu_reset_context reset_context;
|
||||
struct list_head device_list;
|
||||
int r = 0;
|
||||
|
||||
DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
|
||||
dev_info(adev->dev, "PCI error: detected callback!!\n");
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
DRM_WARN("No support for XGMI hive yet...");
|
||||
if (!amdgpu_dpm_is_link_reset_supported(adev)) {
|
||||
dev_warn(adev->dev, "No support for XGMI hive yet...\n");
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
|
||||
@@ -6600,32 +6750,30 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
|
||||
|
||||
switch (state) {
|
||||
case pci_channel_io_normal:
|
||||
dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
|
||||
return PCI_ERS_RESULT_CAN_RECOVER;
|
||||
/* Fatal error, prepare for slot reset */
|
||||
case pci_channel_io_frozen:
|
||||
/*
|
||||
* Locking adev->reset_domain->sem will prevent any external access
|
||||
* to GPU during PCI error recovery
|
||||
*/
|
||||
amdgpu_device_lock_reset_domain(adev->reset_domain);
|
||||
amdgpu_device_set_mp1_state(adev);
|
||||
/* Fatal error, prepare for slot reset */
|
||||
dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
|
||||
|
||||
/*
|
||||
* Block any work scheduling as we do for regular GPU reset
|
||||
* for the duration of the recovery
|
||||
*/
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
if (hive)
|
||||
mutex_lock(&hive->hive_lock);
|
||||
adev->pcie_reset_ctx.occurs_dpc = true;
|
||||
memset(&reset_context, 0, sizeof(reset_context));
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
drm_sched_stop(&ring->sched, NULL);
|
||||
r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
|
||||
hive, false);
|
||||
if (hive) {
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
amdgpu_put_xgmi_hive(hive);
|
||||
}
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
if (r)
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
return PCI_ERS_RESULT_NEED_RESET;
|
||||
case pci_channel_io_perm_failure:
|
||||
/* Permanent error, prepare for device removal */
|
||||
dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
|
||||
return PCI_ERS_RESULT_DISCONNECT;
|
||||
}
|
||||
|
||||
@@ -6638,8 +6786,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
|
||||
*/
|
||||
pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
|
||||
{
|
||||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
|
||||
DRM_INFO("PCI error: mmio enabled callback!!\n");
|
||||
dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
|
||||
|
||||
/* TODO - dump whatever for debugging purposes */
|
||||
|
||||
@@ -6663,10 +6813,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
|
||||
{
|
||||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
int r, i;
|
||||
struct amdgpu_reset_context reset_context;
|
||||
u32 memsize;
|
||||
struct amdgpu_device *tmp_adev;
|
||||
struct amdgpu_hive_info *hive;
|
||||
struct list_head device_list;
|
||||
int r = 0, i;
|
||||
u32 memsize;
|
||||
|
||||
/* PCI error slot reset should be skipped During RAS recovery */
|
||||
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
|
||||
@@ -6674,15 +6826,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
|
||||
amdgpu_ras_in_recovery(adev))
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
|
||||
DRM_INFO("PCI error: slot reset callback!!\n");
|
||||
dev_info(adev->dev, "PCI error: slot reset callback!!\n");
|
||||
|
||||
memset(&reset_context, 0, sizeof(reset_context));
|
||||
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
list_add_tail(&adev->reset_list, &device_list);
|
||||
|
||||
/* wait for asic to come out of reset */
|
||||
msleep(500);
|
||||
msleep(700);
|
||||
|
||||
/* Restore PCI confspace */
|
||||
amdgpu_device_load_pci_state(pdev);
|
||||
@@ -6703,26 +6852,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
|
||||
reset_context.method = AMD_RESET_METHOD_NONE;
|
||||
reset_context.reset_req_dev = adev;
|
||||
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
|
||||
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
|
||||
set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
|
||||
adev->no_hw_access = true;
|
||||
r = amdgpu_device_pre_asic_reset(adev, &reset_context);
|
||||
adev->no_hw_access = false;
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_do_asic_reset(&device_list, &reset_context);
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive) {
|
||||
mutex_lock(&hive->hive_lock);
|
||||
reset_context.hive = hive;
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
tmp_adev->pcie_reset_ctx.in_link_reset = true;
|
||||
list_add_tail(&tmp_adev->reset_list, &device_list);
|
||||
}
|
||||
} else {
|
||||
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
|
||||
list_add_tail(&adev->reset_list, &device_list);
|
||||
}
|
||||
|
||||
r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
|
||||
out:
|
||||
if (!r) {
|
||||
if (amdgpu_device_cache_pci_state(adev->pdev))
|
||||
pci_restore_state(adev->pdev);
|
||||
|
||||
DRM_INFO("PCIe error recovery succeeded\n");
|
||||
dev_info(adev->dev, "PCIe error recovery succeeded\n");
|
||||
} else {
|
||||
DRM_ERROR("PCIe error recovery failed, err:%d", r);
|
||||
amdgpu_device_unset_mp1_state(adev);
|
||||
amdgpu_device_unlock_reset_domain(adev->reset_domain);
|
||||
dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
|
||||
if (hive) {
|
||||
list_for_each_entry(tmp_adev, &device_list, reset_list)
|
||||
amdgpu_device_unset_mp1_state(tmp_adev);
|
||||
amdgpu_device_unlock_reset_domain(adev->reset_domain);
|
||||
}
|
||||
}
|
||||
|
||||
if (hive) {
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
amdgpu_put_xgmi_hive(hive);
|
||||
}
|
||||
|
||||
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
|
||||
@@ -6739,26 +6902,36 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
|
||||
{
|
||||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
int i;
|
||||
struct list_head device_list;
|
||||
struct amdgpu_hive_info *hive = NULL;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
|
||||
DRM_INFO("PCI error: resume callback!!\n");
|
||||
dev_info(adev->dev, "PCI error: resume callback!!\n");
|
||||
|
||||
/* Only continue execution for the case of pci_channel_io_frozen */
|
||||
if (adev->pci_channel_state != pci_channel_io_frozen)
|
||||
return;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive) {
|
||||
mutex_lock(&hive->hive_lock);
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
tmp_adev->pcie_reset_ctx.in_link_reset = false;
|
||||
list_add_tail(&tmp_adev->reset_list, &device_list);
|
||||
}
|
||||
} else
|
||||
list_add_tail(&adev->reset_list, &device_list);
|
||||
|
||||
drm_sched_start(&ring->sched, 0);
|
||||
amdgpu_device_sched_resume(&device_list, NULL, NULL);
|
||||
amdgpu_device_gpu_resume(adev, &device_list, false);
|
||||
adev->pcie_reset_ctx.occurs_dpc = false;
|
||||
|
||||
if (hive) {
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
amdgpu_put_xgmi_hive(hive);
|
||||
}
|
||||
|
||||
amdgpu_device_unset_mp1_state(adev);
|
||||
amdgpu_device_unlock_reset_domain(adev->reset_domain);
|
||||
}
|
||||
|
||||
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
|
||||
|
||||
@@ -51,6 +51,8 @@
|
||||
#include "amdgpu_reset.h"
|
||||
#include "amdgpu_sched.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_userq.h"
|
||||
#include "amdgpu_userq_fence.h"
|
||||
#include "../amdxcp/amdgpu_xcp_drv.h"
|
||||
|
||||
/*
|
||||
@@ -123,9 +125,10 @@
|
||||
* - 3.61.0 - Contains fix for RV/PCO compute queues
|
||||
* - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
|
||||
* - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
|
||||
* - 3.64.0 - Userq IP support query
|
||||
*/
|
||||
#define KMS_DRIVER_MAJOR 3
|
||||
#define KMS_DRIVER_MINOR 63
|
||||
#define KMS_DRIVER_MINOR 64
|
||||
#define KMS_DRIVER_PATCHLEVEL 0
|
||||
|
||||
/*
|
||||
@@ -140,6 +143,7 @@ enum AMDGPU_DEBUG_MASK {
|
||||
AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
|
||||
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
|
||||
AMDGPU_DEBUG_SMU_POOL = BIT(7),
|
||||
AMDGPU_DEBUG_VM_USERPTR = BIT(8),
|
||||
};
|
||||
|
||||
unsigned int amdgpu_vram_limit = UINT_MAX;
|
||||
@@ -176,7 +180,7 @@ uint amdgpu_pg_mask = 0xffffffff;
|
||||
uint amdgpu_sdma_phase_quantum = 32;
|
||||
char *amdgpu_disable_cu;
|
||||
char *amdgpu_virtual_display;
|
||||
bool enforce_isolation;
|
||||
int amdgpu_enforce_isolation = -1;
|
||||
int amdgpu_modeset = -1;
|
||||
|
||||
/* Specifies the default granularity for SVM, used in buffer
|
||||
@@ -238,6 +242,8 @@ int amdgpu_agp = -1; /* auto */
|
||||
int amdgpu_wbrf = -1;
|
||||
int amdgpu_damage_clips = -1; /* auto */
|
||||
int amdgpu_umsch_mm_fwlog;
|
||||
int amdgpu_rebar = -1; /* auto */
|
||||
int amdgpu_user_queue = -1;
|
||||
|
||||
DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
|
||||
"DRM_UT_CORE",
|
||||
@@ -1033,11 +1039,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
|
||||
|
||||
|
||||
/**
|
||||
* DOC: enforce_isolation (bool)
|
||||
* enforce process isolation between graphics and compute via using the same reserved vmid.
|
||||
* DOC: enforce_isolation (int)
|
||||
* enforce process isolation between graphics and compute.
|
||||
* (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
|
||||
*/
|
||||
module_param(enforce_isolation, bool, 0444);
|
||||
MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
|
||||
module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
|
||||
MODULE_PARM_DESC(enforce_isolation,
|
||||
"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
|
||||
|
||||
/**
|
||||
* DOC: modeset (int)
|
||||
@@ -1096,6 +1104,28 @@ MODULE_PARM_DESC(wbrf,
|
||||
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
|
||||
module_param_named(wbrf, amdgpu_wbrf, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: rebar (int)
|
||||
* Allow BAR resizing. Disable this to prevent the driver from attempting
|
||||
* to resize the BAR if the GPU supports it and there is available MMIO space.
|
||||
* Note that this just prevents the driver from resizing the BAR. The BIOS
|
||||
* may have already resized the BAR at boot time.
|
||||
*/
|
||||
MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
|
||||
module_param_named(rebar, amdgpu_rebar, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: user_queue (int)
|
||||
* Enable user queues on systems that support user queues. Possible values:
|
||||
*
|
||||
* - -1 = auto (ASIC specific default)
|
||||
* - 0 = user queues disabled
|
||||
* - 1 = user queues enabled and kernel queues enabled (if supported)
|
||||
* - 2 = user queues enabled and kernel queues disabled
|
||||
*/
|
||||
MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
|
||||
module_param_named(user_queue, amdgpu_user_queue, int, 0444);
|
||||
|
||||
/* These devices are not supported by amdgpu.
|
||||
* They are supported by the mach64, r128, radeon drivers
|
||||
*/
|
||||
@@ -2244,6 +2274,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
|
||||
pr_info("debug: use vram for smu pool\n");
|
||||
adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
|
||||
}
|
||||
if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
|
||||
pr_info("debug: VM mode debug for userptr is enabled\n");
|
||||
adev->debug_vm_userptr = true;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
|
||||
@@ -2615,13 +2649,8 @@ static int amdgpu_pmops_freeze(struct device *dev)
|
||||
static int amdgpu_pmops_thaw(struct device *dev)
|
||||
{
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(drm_dev);
|
||||
int r;
|
||||
|
||||
r = amdgpu_device_resume(drm_dev, true);
|
||||
adev->in_s4 = false;
|
||||
|
||||
return r;
|
||||
return amdgpu_device_resume(drm_dev, true);
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_poweroff(struct device *dev)
|
||||
@@ -2634,9 +2663,6 @@ static int amdgpu_pmops_poweroff(struct device *dev)
|
||||
static int amdgpu_pmops_restore(struct device *dev)
|
||||
{
|
||||
struct drm_device *drm_dev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(drm_dev);
|
||||
|
||||
adev->in_s4 = false;
|
||||
|
||||
return amdgpu_device_resume(drm_dev, true);
|
||||
}
|
||||
@@ -2708,6 +2734,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_runtime_idle_check_userq(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct drm_device *drm_dev = pci_get_drvdata(pdev);
|
||||
struct amdgpu_device *adev = drm_to_adev(drm_dev);
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
int queue_id;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
|
||||
ret = -EBUSY;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
done:
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_pmops_runtime_suspend(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
@@ -2721,6 +2770,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
|
||||
}
|
||||
|
||||
ret = amdgpu_runtime_idle_check_display(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = amdgpu_runtime_idle_check_userq(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -2844,12 +2896,30 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
|
||||
}
|
||||
|
||||
ret = amdgpu_runtime_idle_check_display(dev);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
ret = amdgpu_runtime_idle_check_userq(dev);
|
||||
done:
|
||||
pm_runtime_mark_last_busy(dev);
|
||||
pm_runtime_autosuspend(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_drm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct drm_file *file_priv = filp->private_data;
|
||||
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
|
||||
|
||||
if (fpriv) {
|
||||
fpriv->evf_mgr.fd_closing = true;
|
||||
amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
|
||||
amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
|
||||
}
|
||||
|
||||
return drm_release(inode, filp);
|
||||
}
|
||||
|
||||
long amdgpu_drm_ioctl(struct file *filp,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
@@ -2901,7 +2971,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = drm_open,
|
||||
.flush = amdgpu_flush,
|
||||
.release = drm_release,
|
||||
.release = amdgpu_drm_release,
|
||||
.unlocked_ioctl = amdgpu_drm_ioctl,
|
||||
.mmap = drm_gem_mmap,
|
||||
.poll = drm_poll,
|
||||
@@ -2948,6 +3018,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
|
||||
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
|
||||
DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
|
||||
};
|
||||
|
||||
static const struct drm_driver amdgpu_kms_driver = {
|
||||
@@ -3038,6 +3111,10 @@ static int __init amdgpu_init(void)
|
||||
if (r)
|
||||
goto error_fence;
|
||||
|
||||
r = amdgpu_userq_fence_slab_init();
|
||||
if (r)
|
||||
goto error_fence;
|
||||
|
||||
DRM_INFO("amdgpu kernel modesetting enabled.\n");
|
||||
amdgpu_register_atpx_handler();
|
||||
amdgpu_acpi_detect();
|
||||
@@ -3069,6 +3146,7 @@ static void __exit amdgpu_exit(void)
|
||||
amdgpu_acpi_release();
|
||||
amdgpu_sync_fini();
|
||||
amdgpu_fence_slab_fini();
|
||||
amdgpu_userq_fence_slab_fini();
|
||||
mmu_notifier_synchronize();
|
||||
amdgpu_xcp_drv_release();
|
||||
}
|
||||
|
||||
233
drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
Normal file
233
drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
Normal file
@@ -0,0 +1,233 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright 2024 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <drm/drm_exec.h>
|
||||
#include "amdgpu.h"
|
||||
|
||||
#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
|
||||
#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
|
||||
|
||||
static const char *
|
||||
amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
|
||||
{
|
||||
return "amdgpu_eviction_fence";
|
||||
}
|
||||
|
||||
static const char *
|
||||
amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_eviction_fence *ef;
|
||||
|
||||
ef = container_of(f, struct amdgpu_eviction_fence, base);
|
||||
return ef->timeline_name;
|
||||
}
|
||||
|
||||
int
|
||||
amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct drm_exec *exec)
|
||||
{
|
||||
struct amdgpu_eviction_fence *old_ef, *new_ef;
|
||||
struct drm_gem_object *obj;
|
||||
unsigned long index;
|
||||
int ret;
|
||||
|
||||
if (evf_mgr->ev_fence &&
|
||||
!dma_fence_is_signaled(&evf_mgr->ev_fence->base))
|
||||
return 0;
|
||||
/*
|
||||
* Steps to replace eviction fence:
|
||||
* * lock all objects in exec (caller)
|
||||
* * create a new eviction fence
|
||||
* * update new eviction fence in evf_mgr
|
||||
* * attach the new eviction fence to BOs
|
||||
* * release the old fence
|
||||
* * unlock the objects (caller)
|
||||
*/
|
||||
new_ef = amdgpu_eviction_fence_create(evf_mgr);
|
||||
if (!new_ef) {
|
||||
DRM_ERROR("Failed to create new eviction fence\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Update the eviction fence now */
|
||||
spin_lock(&evf_mgr->ev_fence_lock);
|
||||
old_ef = evf_mgr->ev_fence;
|
||||
evf_mgr->ev_fence = new_ef;
|
||||
spin_unlock(&evf_mgr->ev_fence_lock);
|
||||
|
||||
/* Attach the new fence */
|
||||
drm_exec_for_each_locked_object(exec, index, obj) {
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
|
||||
if (!bo)
|
||||
continue;
|
||||
ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to attch new eviction fence\n");
|
||||
goto free_err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free old fence */
|
||||
if (old_ef)
|
||||
dma_fence_put(&old_ef->base);
|
||||
return 0;
|
||||
|
||||
free_err:
|
||||
kfree(new_ef);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
|
||||
struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
|
||||
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
|
||||
mutex_lock(&uq_mgr->userq_mutex);
|
||||
ev_fence = evf_mgr->ev_fence;
|
||||
if (!ev_fence)
|
||||
goto unlock;
|
||||
|
||||
amdgpu_userq_evict(uq_mgr, ev_fence);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
}
|
||||
|
||||
static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr;
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
|
||||
if (!f)
|
||||
return true;
|
||||
|
||||
ev_fence = to_ev_fence(f);
|
||||
evf_mgr = ev_fence->evf_mgr;
|
||||
|
||||
schedule_delayed_work(&evf_mgr->suspend_work, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
|
||||
.use_64bit_seqno = true,
|
||||
.get_driver_name = amdgpu_eviction_fence_get_driver_name,
|
||||
.get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
|
||||
.enable_signaling = amdgpu_eviction_fence_enable_signaling,
|
||||
};
|
||||
|
||||
void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct amdgpu_eviction_fence *ev_fence)
|
||||
{
|
||||
spin_lock(&evf_mgr->ev_fence_lock);
|
||||
dma_fence_signal(&ev_fence->base);
|
||||
spin_unlock(&evf_mgr->ev_fence_lock);
|
||||
}
|
||||
|
||||
struct amdgpu_eviction_fence *
|
||||
amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
|
||||
{
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
|
||||
ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
|
||||
if (!ev_fence)
|
||||
return NULL;
|
||||
|
||||
ev_fence->evf_mgr = evf_mgr;
|
||||
get_task_comm(ev_fence->timeline_name, current);
|
||||
spin_lock_init(&ev_fence->lock);
|
||||
dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops,
|
||||
&ev_fence->lock, evf_mgr->ev_fence_ctx,
|
||||
atomic_inc_return(&evf_mgr->ev_fence_seq));
|
||||
return ev_fence;
|
||||
}
|
||||
|
||||
void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
|
||||
{
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
|
||||
/* Wait for any pending work to execute */
|
||||
flush_delayed_work(&evf_mgr->suspend_work);
|
||||
|
||||
spin_lock(&evf_mgr->ev_fence_lock);
|
||||
ev_fence = evf_mgr->ev_fence;
|
||||
spin_unlock(&evf_mgr->ev_fence_lock);
|
||||
|
||||
if (!ev_fence)
|
||||
return;
|
||||
|
||||
dma_fence_wait(&ev_fence->base, false);
|
||||
|
||||
/* Last unref of ev_fence */
|
||||
dma_fence_put(&evf_mgr->ev_fence->base);
|
||||
}
|
||||
|
||||
int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct amdgpu_bo *bo)
|
||||
{
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
struct dma_resv *resv = bo->tbo.base.resv;
|
||||
int ret;
|
||||
|
||||
if (!resv)
|
||||
return 0;
|
||||
|
||||
ret = dma_resv_reserve_fences(resv, 1);
|
||||
if (ret) {
|
||||
DRM_DEBUG_DRIVER("Failed to resv fence space\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
spin_lock(&evf_mgr->ev_fence_lock);
|
||||
ev_fence = evf_mgr->ev_fence;
|
||||
if (ev_fence)
|
||||
dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
|
||||
spin_unlock(&evf_mgr->ev_fence_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct amdgpu_bo *bo)
|
||||
{
|
||||
struct dma_fence *stub = dma_fence_get_stub();
|
||||
|
||||
dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
|
||||
stub, DMA_RESV_USAGE_BOOKKEEP);
|
||||
dma_fence_put(stub);
|
||||
}
|
||||
|
||||
int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
|
||||
{
|
||||
/* This needs to be done one time per open */
|
||||
atomic_set(&evf_mgr->ev_fence_seq, 0);
|
||||
evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
|
||||
spin_lock_init(&evf_mgr->ev_fence_lock);
|
||||
|
||||
INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
|
||||
return 0;
|
||||
}
|
||||
69
drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
Normal file
69
drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef AMDGPU_EV_FENCE_H_
|
||||
#define AMDGPU_EV_FENCE_H_
|
||||
|
||||
struct amdgpu_eviction_fence {
|
||||
struct dma_fence base;
|
||||
spinlock_t lock;
|
||||
char timeline_name[TASK_COMM_LEN];
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr;
|
||||
};
|
||||
|
||||
struct amdgpu_eviction_fence_mgr {
|
||||
u64 ev_fence_ctx;
|
||||
atomic_t ev_fence_seq;
|
||||
spinlock_t ev_fence_lock;
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
struct delayed_work suspend_work;
|
||||
uint8_t fd_closing;
|
||||
};
|
||||
|
||||
/* Eviction fence helper functions */
|
||||
struct amdgpu_eviction_fence *
|
||||
amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
|
||||
|
||||
void
|
||||
amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
|
||||
|
||||
int
|
||||
amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct amdgpu_bo *bo);
|
||||
|
||||
void
|
||||
amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct amdgpu_bo *bo);
|
||||
|
||||
int
|
||||
amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
|
||||
|
||||
void
|
||||
amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct amdgpu_eviction_fence *ev_fence);
|
||||
|
||||
int
|
||||
amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
|
||||
struct drm_exec *exec);
|
||||
#endif
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <drm/drm_exec.h>
|
||||
#include <drm/drm_gem_ttm_helper.h>
|
||||
#include <drm/ttm/ttm_tt.h>
|
||||
#include <drm/drm_syncobj.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_display.h"
|
||||
@@ -44,6 +45,114 @@
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_vm.h"
|
||||
|
||||
static int
|
||||
amdgpu_gem_add_input_fence(struct drm_file *filp,
|
||||
uint64_t syncobj_handles_array,
|
||||
uint32_t num_syncobj_handles)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
uint32_t *syncobj_handles;
|
||||
int ret, i;
|
||||
|
||||
if (!num_syncobj_handles)
|
||||
return 0;
|
||||
|
||||
syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
|
||||
sizeof(uint32_t) * num_syncobj_handles);
|
||||
if (IS_ERR(syncobj_handles))
|
||||
return PTR_ERR(syncobj_handles);
|
||||
|
||||
for (i = 0; i < num_syncobj_handles; i++) {
|
||||
|
||||
if (!syncobj_handles[i]) {
|
||||
ret = -EINVAL;
|
||||
goto free_memdup;
|
||||
}
|
||||
|
||||
ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
|
||||
if (ret)
|
||||
goto free_memdup;
|
||||
|
||||
dma_fence_wait(fence, false);
|
||||
|
||||
/* TODO: optimize async handling */
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
free_memdup:
|
||||
kfree(syncobj_handles);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_gem_update_timeline_node(struct drm_file *filp,
|
||||
uint32_t syncobj_handle,
|
||||
uint64_t point,
|
||||
struct drm_syncobj **syncobj,
|
||||
struct dma_fence_chain **chain)
|
||||
{
|
||||
if (!syncobj_handle)
|
||||
return 0;
|
||||
|
||||
/* Find the sync object */
|
||||
*syncobj = drm_syncobj_find(filp, syncobj_handle);
|
||||
if (!*syncobj)
|
||||
return -ENOENT;
|
||||
|
||||
if (!point)
|
||||
return 0;
|
||||
|
||||
/* Allocate the chain node */
|
||||
*chain = dma_fence_chain_alloc();
|
||||
if (!*chain) {
|
||||
drm_syncobj_put(*syncobj);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
amdgpu_gem_update_bo_mapping(struct drm_file *filp,
|
||||
struct amdgpu_bo_va *bo_va,
|
||||
uint32_t operation,
|
||||
uint64_t point,
|
||||
struct dma_fence *fence,
|
||||
struct drm_syncobj *syncobj,
|
||||
struct dma_fence_chain *chain)
|
||||
{
|
||||
struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_vm *vm = &fpriv->vm;
|
||||
struct dma_fence *last_update;
|
||||
|
||||
if (!syncobj)
|
||||
return;
|
||||
|
||||
/* Find the last update fence */
|
||||
switch (operation) {
|
||||
case AMDGPU_VA_OP_MAP:
|
||||
case AMDGPU_VA_OP_REPLACE:
|
||||
if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
|
||||
last_update = vm->last_update;
|
||||
else
|
||||
last_update = bo_va->last_pt_update;
|
||||
break;
|
||||
case AMDGPU_VA_OP_UNMAP:
|
||||
case AMDGPU_VA_OP_CLEAR:
|
||||
last_update = fence;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add fence to timeline */
|
||||
if (!point)
|
||||
drm_syncobj_replace_fence(syncobj, last_update);
|
||||
else
|
||||
drm_syncobj_add_point(syncobj, chain, last_update, point);
|
||||
}
|
||||
|
||||
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
|
||||
@@ -184,6 +293,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
|
||||
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
|
||||
else
|
||||
++bo_va->ref_count;
|
||||
|
||||
/* attach gfx eviction fence */
|
||||
r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
|
||||
if (r) {
|
||||
DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
|
||||
amdgpu_bo_unreserve(abo);
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_bo_unreserve(abo);
|
||||
|
||||
/* Validate and add eviction fence to DMABuf imports with dynamic
|
||||
@@ -247,6 +365,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
|
||||
amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
|
||||
|
||||
bo_va = amdgpu_vm_bo_find(vm, bo);
|
||||
if (!bo_va || --bo_va->ref_count)
|
||||
goto out_unlock;
|
||||
@@ -321,10 +442,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
|
||||
uint32_t handle, initial_domain;
|
||||
int r;
|
||||
|
||||
/* reject DOORBELLs until userspace code to use it is available */
|
||||
if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
|
||||
return -EINVAL;
|
||||
|
||||
/* reject invalid gem flags */
|
||||
if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
||||
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
|
||||
@@ -638,18 +755,23 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
|
||||
*
|
||||
* Update the bo_va directly after setting its address. Errors are not
|
||||
* vital here, so they are not reported back to userspace.
|
||||
*
|
||||
* Returns resulting fence if freed BO(s) got cleared from the PT.
|
||||
* otherwise stub fence in case of error.
|
||||
*/
|
||||
static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va *bo_va,
|
||||
uint32_t operation)
|
||||
static struct dma_fence *
|
||||
amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va *bo_va,
|
||||
uint32_t operation)
|
||||
{
|
||||
struct dma_fence *fence = dma_fence_get_stub();
|
||||
int r;
|
||||
|
||||
if (!amdgpu_vm_ready(vm))
|
||||
return;
|
||||
return fence;
|
||||
|
||||
r = amdgpu_vm_clear_freed(adev, vm, NULL);
|
||||
r = amdgpu_vm_clear_freed(adev, vm, &fence);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
@@ -665,6 +787,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
|
||||
error:
|
||||
if (r && r != -ERESTARTSYS)
|
||||
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -713,6 +837,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_bo *abo;
|
||||
struct amdgpu_bo_va *bo_va;
|
||||
struct drm_syncobj *timeline_syncobj = NULL;
|
||||
struct dma_fence_chain *timeline_chain = NULL;
|
||||
struct dma_fence *fence;
|
||||
struct drm_exec exec;
|
||||
uint64_t va_flags;
|
||||
uint64_t vm_size;
|
||||
@@ -774,6 +901,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
abo = NULL;
|
||||
}
|
||||
|
||||
r = amdgpu_gem_add_input_fence(filp,
|
||||
args->input_fence_syncobj_handles,
|
||||
args->num_syncobj_handles);
|
||||
if (r)
|
||||
goto error_put_gobj;
|
||||
|
||||
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
|
||||
DRM_EXEC_IGNORE_DUPLICATES, 0);
|
||||
drm_exec_until_all_locked(&exec) {
|
||||
@@ -802,6 +935,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
bo_va = NULL;
|
||||
}
|
||||
|
||||
r = amdgpu_gem_update_timeline_node(filp,
|
||||
args->vm_timeline_syncobj_out,
|
||||
args->vm_timeline_point,
|
||||
&timeline_syncobj,
|
||||
&timeline_chain);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
switch (args->operation) {
|
||||
case AMDGPU_VA_OP_MAP:
|
||||
va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
|
||||
@@ -827,12 +968,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm)
|
||||
amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
|
||||
args->operation);
|
||||
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
|
||||
fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
|
||||
args->operation);
|
||||
|
||||
if (timeline_syncobj)
|
||||
amdgpu_gem_update_bo_mapping(filp, bo_va,
|
||||
args->operation,
|
||||
args->vm_timeline_point,
|
||||
fence, timeline_syncobj,
|
||||
timeline_chain);
|
||||
else
|
||||
dma_fence_put(fence);
|
||||
|
||||
}
|
||||
|
||||
error:
|
||||
drm_exec_fini(&exec);
|
||||
error_put_gobj:
|
||||
drm_gem_object_put(gobj);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "amdgpu_reset.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "nvd.h"
|
||||
|
||||
/* delay 0.1 second to enable gfx off feature */
|
||||
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
|
||||
@@ -74,14 +75,15 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
|
||||
adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
|
||||
}
|
||||
|
||||
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
|
||||
int me, int pipe, int queue)
|
||||
static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
|
||||
int me, int pipe, int queue)
|
||||
{
|
||||
int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
|
||||
int bit = 0;
|
||||
|
||||
bit += me * adev->gfx.me.num_pipe_per_me
|
||||
* adev->gfx.me.num_queue_per_pipe;
|
||||
bit += pipe * adev->gfx.me.num_queue_per_pipe;
|
||||
* num_queue_per_pipe;
|
||||
bit += pipe * num_queue_per_pipe;
|
||||
bit += queue;
|
||||
|
||||
return bit;
|
||||
@@ -238,8 +240,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, queue, pipe;
|
||||
bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
|
||||
int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
|
||||
adev->gfx.me.num_queue_per_pipe;
|
||||
int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
|
||||
int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
|
||||
|
||||
if (multipipe_policy) {
|
||||
/* policy: amdgpu owns the first queue per pipe at this stage
|
||||
@@ -247,9 +249,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
||||
for (i = 0; i < max_queues_per_me; i++) {
|
||||
pipe = i % adev->gfx.me.num_pipe_per_me;
|
||||
queue = (i / adev->gfx.me.num_pipe_per_me) %
|
||||
adev->gfx.me.num_queue_per_pipe;
|
||||
num_queue_per_pipe;
|
||||
|
||||
set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
|
||||
set_bit(pipe * num_queue_per_pipe + queue,
|
||||
adev->gfx.me.queue_bitmap);
|
||||
}
|
||||
} else {
|
||||
@@ -258,8 +260,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
/* update the number of active graphics rings */
|
||||
adev->gfx.num_gfx_rings =
|
||||
bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
|
||||
if (adev->gfx.num_gfx_rings)
|
||||
adev->gfx.num_gfx_rings =
|
||||
bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
|
||||
}
|
||||
|
||||
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
|
||||
@@ -1351,6 +1354,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
int mode;
|
||||
|
||||
/* Only minimal precaution taken to reject requests while in reset.*/
|
||||
if (amdgpu_in_reset(adev))
|
||||
return -EPERM;
|
||||
|
||||
mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
|
||||
AMDGPU_XCP_FL_NONE);
|
||||
|
||||
@@ -1394,8 +1401,14 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Don't allow a switch while under reset */
|
||||
if (!down_read_trylock(&adev->reset_domain->sem))
|
||||
return -EPERM;
|
||||
|
||||
ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
|
||||
|
||||
up_read(&adev->reset_domain->sem);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -1466,6 +1479,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
|
||||
goto err;
|
||||
|
||||
job->enforce_isolation = true;
|
||||
/* always run the cleaner shader */
|
||||
job->run_cleaner_shader = true;
|
||||
|
||||
ib = &job->ibs[0];
|
||||
for (i = 0; i <= ring->funcs->align_mask; ++i)
|
||||
@@ -1552,6 +1567,9 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
|
||||
if (adev->in_suspend && !adev->in_runpm)
|
||||
return -EPERM;
|
||||
|
||||
if (adev->gfx.disable_kq)
|
||||
return -EPERM;
|
||||
|
||||
ret = kstrtol(buf, 0, &value);
|
||||
|
||||
if (ret)
|
||||
@@ -1594,7 +1612,8 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
|
||||
* Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
|
||||
* feature for each GPU partition. Reading from the 'enforce_isolation'
|
||||
* sysfs file returns the isolation settings for all partitions, where '0'
|
||||
* indicates disabled and '1' indicates enabled.
|
||||
* indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
|
||||
* and '3' indicates enabled without cleaner shader.
|
||||
*
|
||||
* Return: The number of bytes read from the sysfs file.
|
||||
*/
|
||||
@@ -1629,9 +1648,12 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
|
||||
* @count: The size of the input data
|
||||
*
|
||||
* This function allows control over the 'enforce_isolation' feature, which
|
||||
* serializes access to the graphics engine. Writing '1' or '0' to the
|
||||
* 'enforce_isolation' sysfs file enables or disables process isolation for
|
||||
* each partition. The input should specify the setting for all partitions.
|
||||
* serializes access to the graphics engine. Writing '0' to disable, '1' to
|
||||
* enable isolation with cleaner shader, '2' to enable legacy isolation without
|
||||
* cleaner shader, or '3' to enable process isolation without submitting the
|
||||
* cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
|
||||
* for each partition. The input should specify the setting for all
|
||||
* partitions.
|
||||
*
|
||||
* Return: The number of bytes written to the sysfs file.
|
||||
*/
|
||||
@@ -1668,13 +1690,34 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
if (partition_values[i] != 0 && partition_values[i] != 1)
|
||||
if (partition_values[i] != 0 &&
|
||||
partition_values[i] != 1 &&
|
||||
partition_values[i] != 2 &&
|
||||
partition_values[i] != 3)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&adev->enforce_isolation_mutex);
|
||||
for (i = 0; i < num_partitions; i++)
|
||||
adev->enforce_isolation[i] = partition_values[i];
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
switch (partition_values[i]) {
|
||||
case 0:
|
||||
default:
|
||||
adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
|
||||
break;
|
||||
case 1:
|
||||
adev->enforce_isolation[i] =
|
||||
AMDGPU_ENFORCE_ISOLATION_ENABLE;
|
||||
break;
|
||||
case 2:
|
||||
adev->enforce_isolation[i] =
|
||||
AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
|
||||
break;
|
||||
case 3:
|
||||
adev->enforce_isolation[i] =
|
||||
AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&adev->enforce_isolation_mutex);
|
||||
|
||||
amdgpu_mes_update_enforce_isolation(adev);
|
||||
@@ -1923,39 +1966,41 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
|
||||
static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
|
||||
bool enable)
|
||||
{
|
||||
mutex_lock(&adev->gfx.kfd_sch_mutex);
|
||||
mutex_lock(&adev->gfx.userq_sch_mutex);
|
||||
|
||||
if (enable) {
|
||||
/* If the count is already 0, it means there's an imbalance bug somewhere.
|
||||
* Note that the bug may be in a different caller than the one which triggers the
|
||||
* WARN_ON_ONCE.
|
||||
*/
|
||||
if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
|
||||
if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
|
||||
dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
adev->gfx.kfd_sch_req_count[idx]--;
|
||||
adev->gfx.userq_sch_req_count[idx]--;
|
||||
|
||||
if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
|
||||
adev->gfx.kfd_sch_inactive[idx]) {
|
||||
if (adev->gfx.userq_sch_req_count[idx] == 0 &&
|
||||
adev->gfx.userq_sch_inactive[idx]) {
|
||||
schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
|
||||
msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
|
||||
}
|
||||
} else {
|
||||
if (adev->gfx.kfd_sch_req_count[idx] == 0) {
|
||||
if (adev->gfx.userq_sch_req_count[idx] == 0) {
|
||||
cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
|
||||
if (!adev->gfx.kfd_sch_inactive[idx]) {
|
||||
amdgpu_amdkfd_stop_sched(adev, idx);
|
||||
adev->gfx.kfd_sch_inactive[idx] = true;
|
||||
if (!adev->gfx.userq_sch_inactive[idx]) {
|
||||
amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
|
||||
if (adev->kfd.init_complete)
|
||||
amdgpu_amdkfd_stop_sched(adev, idx);
|
||||
adev->gfx.userq_sch_inactive[idx] = true;
|
||||
}
|
||||
}
|
||||
|
||||
adev->gfx.kfd_sch_req_count[idx]++;
|
||||
adev->gfx.userq_sch_req_count[idx]++;
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&adev->gfx.kfd_sch_mutex);
|
||||
mutex_unlock(&adev->gfx.userq_sch_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2000,12 +2045,13 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
|
||||
msecs_to_jiffies(1));
|
||||
} else {
|
||||
/* Tell KFD to resume the runqueue */
|
||||
if (adev->kfd.init_complete) {
|
||||
WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
|
||||
WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
|
||||
WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
|
||||
WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
|
||||
|
||||
amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
|
||||
if (adev->kfd.init_complete)
|
||||
amdgpu_amdkfd_start_sched(adev, idx);
|
||||
adev->gfx.kfd_sch_inactive[idx] = false;
|
||||
}
|
||||
adev->gfx.userq_sch_inactive[idx] = false;
|
||||
}
|
||||
mutex_unlock(&adev->enforce_isolation_mutex);
|
||||
}
|
||||
@@ -2029,7 +2075,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
|
||||
bool wait = false;
|
||||
|
||||
mutex_lock(&adev->enforce_isolation_mutex);
|
||||
if (adev->enforce_isolation[idx]) {
|
||||
if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
|
||||
/* set the initial values if nothing is set */
|
||||
if (!adev->gfx.enforce_isolation_jiffies[idx]) {
|
||||
adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
|
||||
@@ -2096,7 +2142,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
|
||||
amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
|
||||
|
||||
mutex_lock(&adev->enforce_isolation_mutex);
|
||||
if (adev->enforce_isolation[idx]) {
|
||||
if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
|
||||
if (adev->kfd.init_complete)
|
||||
sched_work = true;
|
||||
}
|
||||
@@ -2133,7 +2179,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
|
||||
return;
|
||||
|
||||
mutex_lock(&adev->enforce_isolation_mutex);
|
||||
if (adev->enforce_isolation[idx]) {
|
||||
if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
|
||||
if (adev->kfd.init_complete)
|
||||
sched_work = true;
|
||||
}
|
||||
@@ -2217,6 +2263,74 @@ void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
|
||||
schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_csb_preamble_start - Set CSB preamble start
|
||||
*
|
||||
* @buffer: This is an output variable that gets the PACKET3 preamble setup.
|
||||
*
|
||||
* Return:
|
||||
* return the latest index.
|
||||
*/
|
||||
u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_csb_data_parser - Parser CS data
|
||||
*
|
||||
* @adev: amdgpu_device pointer used to get the CS data and other gfx info.
|
||||
* @buffer: This is an output variable that gets the PACKET3 preamble end.
|
||||
* @count: Index to start set the preemble end.
|
||||
*
|
||||
* Return:
|
||||
* return the latest index.
|
||||
*/
|
||||
u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count)
|
||||
{
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 i;
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
|
||||
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_csb_preamble_end - Set CSB preamble end
|
||||
*
|
||||
* @buffer: This is an output variable that gets the PACKET3 preamble end.
|
||||
* @count: Index to start set the preemble end.
|
||||
*/
|
||||
void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count)
|
||||
{
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* debugfs for to enable/disable gfx job submission to specific core.
|
||||
*/
|
||||
|
||||
@@ -170,10 +170,46 @@ struct amdgpu_kiq {
|
||||
#define AMDGPU_GFX_MAX_SE 4
|
||||
#define AMDGPU_GFX_MAX_SH_PER_SE 2
|
||||
|
||||
/**
|
||||
* amdgpu_rb_config - Configure a single Render Backend (RB)
|
||||
*
|
||||
* Bad RBs are fused off and there is a harvest register the driver reads to
|
||||
* determine which RB(s) are fused off so that the driver can configure the
|
||||
* hardware state so that nothing gets sent to them. There are also user
|
||||
* harvest registers that the driver can program to disable additional RBs,
|
||||
* etc., for testing purposes.
|
||||
*/
|
||||
struct amdgpu_rb_config {
|
||||
/**
|
||||
* @rb_backend_disable:
|
||||
*
|
||||
* The value captured from register RB_BACKEND_DISABLE indicates if the
|
||||
* RB backend is disabled or not.
|
||||
*/
|
||||
uint32_t rb_backend_disable;
|
||||
|
||||
/**
|
||||
* @user_rb_backend_disable:
|
||||
*
|
||||
* The value captured from register USER_RB_BACKEND_DISABLE indicates
|
||||
* if the User RB backend is disabled or not.
|
||||
*/
|
||||
uint32_t user_rb_backend_disable;
|
||||
|
||||
/**
|
||||
* @raster_config:
|
||||
*
|
||||
* To set up all of the states, it is necessary to have two registers
|
||||
* to keep all of the states. This field holds the first register.
|
||||
*/
|
||||
uint32_t raster_config;
|
||||
|
||||
/**
|
||||
* @raster_config_1:
|
||||
*
|
||||
* To set up all of the states, it is necessary to have two registers
|
||||
* to keep all of the states. This field holds the second register.
|
||||
*/
|
||||
uint32_t raster_config_1;
|
||||
};
|
||||
|
||||
@@ -221,6 +257,13 @@ struct amdgpu_gfx_config {
|
||||
uint32_t macrotile_mode_array[16];
|
||||
|
||||
struct gb_addr_config gb_addr_config_fields;
|
||||
|
||||
/**
|
||||
* @rb_config:
|
||||
*
|
||||
* Matrix that keeps all the Render Backend (color and depth buffer
|
||||
* handling) configuration on the 3D engine.
|
||||
*/
|
||||
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
|
||||
|
||||
/* gfx configure feature */
|
||||
@@ -305,7 +348,8 @@ struct amdgpu_gfx_funcs {
|
||||
void (*init_spm_golden)(struct amdgpu_device *adev);
|
||||
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
|
||||
int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
|
||||
struct amdgpu_gfx_shadow_info *shadow_info);
|
||||
struct amdgpu_gfx_shadow_info *shadow_info,
|
||||
bool skip_check);
|
||||
enum amdgpu_gfx_partition
|
||||
(*query_partition_mode)(struct amdgpu_device *adev);
|
||||
int (*switch_partition_mode)(struct amdgpu_device *adev,
|
||||
@@ -474,9 +518,9 @@ struct amdgpu_gfx {
|
||||
bool enable_cleaner_shader;
|
||||
struct amdgpu_isolation_work enforce_isolation[MAX_XCP];
|
||||
/* Mutex for synchronizing KFD scheduler operations */
|
||||
struct mutex kfd_sch_mutex;
|
||||
u64 kfd_sch_req_count[MAX_XCP];
|
||||
bool kfd_sch_inactive[MAX_XCP];
|
||||
struct mutex userq_sch_mutex;
|
||||
u64 userq_sch_req_count[MAX_XCP];
|
||||
bool userq_sch_inactive[MAX_XCP];
|
||||
unsigned long enforce_isolation_jiffies[MAX_XCP];
|
||||
unsigned long enforce_isolation_time[MAX_XCP];
|
||||
|
||||
@@ -484,6 +528,9 @@ struct amdgpu_gfx {
|
||||
struct delayed_work idle_work;
|
||||
bool workload_profile_active;
|
||||
struct mutex workload_profile_mutex;
|
||||
|
||||
bool disable_kq;
|
||||
bool disable_uq;
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_ras_reg_entry {
|
||||
@@ -503,7 +550,7 @@ struct amdgpu_gfx_ras_mem_id_entry {
|
||||
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
|
||||
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
|
||||
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
|
||||
#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
|
||||
#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_create_bitmask - create a bitmask
|
||||
@@ -550,8 +597,6 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
|
||||
int pipe, int queue);
|
||||
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
|
||||
int pipe, int queue);
|
||||
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
|
||||
@@ -597,6 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
|
||||
void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
|
||||
void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
|
||||
void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
|
||||
u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer);
|
||||
u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count);
|
||||
void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count);
|
||||
|
||||
void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
|
||||
void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
|
||||
|
||||
@@ -1230,6 +1230,10 @@ static ssize_t current_memory_partition_show(
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
enum amdgpu_memory_partition mode;
|
||||
|
||||
/* Only minimal precaution taken to reject requests while in reset */
|
||||
if (amdgpu_in_reset(adev))
|
||||
return -EPERM;
|
||||
|
||||
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
|
||||
if ((mode >= ARRAY_SIZE(nps_desc)) ||
|
||||
(BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
|
||||
{
|
||||
@@ -46,3 +47,22 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
|
||||
/* hdp ras follows amdgpu_ras_block_late_init_default for late init */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg) {
|
||||
WREG32((adev->rmmio_remap.reg_offset +
|
||||
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
|
||||
2,
|
||||
0);
|
||||
if (adev->nbio.funcs->get_memsize)
|
||||
adev->nbio.funcs->get_memsize(adev);
|
||||
} else {
|
||||
amdgpu_ring_emit_wreg(ring,
|
||||
(adev->rmmio_remap.reg_offset +
|
||||
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
|
||||
2,
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,4 +44,6 @@ struct amdgpu_hdp {
|
||||
};
|
||||
|
||||
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
|
||||
void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
#endif /* __AMDGPU_HDP_H__ */
|
||||
|
||||
@@ -163,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
|
||||
init_shadow = false;
|
||||
}
|
||||
|
||||
if (!ring->sched.ready && !ring->is_mes_queue) {
|
||||
if (!ring->sched.ready) {
|
||||
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (vm && !job->vmid && !ring->is_mes_queue) {
|
||||
if (vm && !job->vmid) {
|
||||
dev_err(adev->dev, "VM IB without ID\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -576,8 +576,16 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
|
||||
INIT_LIST_HEAD(&id_mgr->ids_lru);
|
||||
id_mgr->reserved_use_count = 0;
|
||||
|
||||
/* manage only VMIDs not used by KFD */
|
||||
id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
|
||||
/* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
|
||||
if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
|
||||
/* manage only VMIDs not used by KFD */
|
||||
id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
|
||||
else if (AMDGPU_IS_MMHUB0(i) ||
|
||||
AMDGPU_IS_MMHUB1(i))
|
||||
id_mgr->num_ids = 16;
|
||||
else
|
||||
/* manage only VMIDs not used by KFD */
|
||||
id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
|
||||
|
||||
/* skip over VMID 0, since it is the system VM */
|
||||
for (j = 1; j < id_mgr->num_ids; ++j) {
|
||||
@@ -588,7 +596,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
|
||||
}
|
||||
/* alloc a default reserved vmid to enforce isolation */
|
||||
for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
|
||||
if (adev->enforce_isolation[i])
|
||||
if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE)
|
||||
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_ih.h"
|
||||
#include "amdgpu_reset.h"
|
||||
|
||||
/**
|
||||
* amdgpu_ih_ring_init - initialize the IH state
|
||||
@@ -227,13 +228,23 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
|
||||
ih->rptr &= ih->ptr_mask;
|
||||
}
|
||||
|
||||
amdgpu_ih_set_rptr(adev, ih);
|
||||
if (!ih->overflow)
|
||||
amdgpu_ih_set_rptr(adev, ih);
|
||||
|
||||
wake_up_all(&ih->wait_process);
|
||||
|
||||
/* make sure wptr hasn't changed while processing */
|
||||
wptr = amdgpu_ih_get_wptr(adev, ih);
|
||||
if (wptr != ih->rptr)
|
||||
goto restart_ih;
|
||||
if (!ih->overflow)
|
||||
goto restart_ih;
|
||||
|
||||
if (ih->overflow)
|
||||
if (amdgpu_sriov_runtime(adev))
|
||||
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
|
||||
&adev->virt.flr_work),
|
||||
"Failed to queue work! at %s",
|
||||
__func__);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ struct amdgpu_ih_ring {
|
||||
/* For waiting on IH processing at checkpoint. */
|
||||
wait_queue_head_t wait_process;
|
||||
uint64_t processed_timestamp;
|
||||
bool overflow;
|
||||
};
|
||||
|
||||
/* return true if time stamp t2 is after t1 with 48bit wrap around */
|
||||
|
||||
@@ -78,6 +78,7 @@ struct amdgpu_job {
|
||||
|
||||
/* enforce isolation */
|
||||
bool enforce_isolation;
|
||||
bool run_cleaner_shader;
|
||||
|
||||
uint32_t num_ibs;
|
||||
struct amdgpu_ib ibs[];
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "amd_pcie.h"
|
||||
#include "amdgpu_userq.h"
|
||||
|
||||
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
|
||||
{
|
||||
@@ -370,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
|
||||
struct drm_amdgpu_info *info,
|
||||
struct drm_amdgpu_info_uq_metadata_gfx *meta)
|
||||
{
|
||||
int ret = -EOPNOTSUPP;
|
||||
|
||||
if (adev->gfx.funcs->get_gfx_shadow_info) {
|
||||
struct amdgpu_gfx_shadow_info shadow = {};
|
||||
|
||||
adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
|
||||
meta->shadow_size = shadow.shadow_size;
|
||||
meta->shadow_alignment = shadow.shadow_alignment;
|
||||
meta->csa_size = shadow.csa_size;
|
||||
meta->csa_alignment = shadow.csa_alignment;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
struct drm_amdgpu_info *info,
|
||||
struct drm_amdgpu_info_hw_ip *result)
|
||||
@@ -387,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_GFX:
|
||||
type = AMD_IP_BLOCK_TYPE_GFX;
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
if (adev->gfx.gfx_ring[i].sched.ready)
|
||||
if (adev->gfx.gfx_ring[i].sched.ready &&
|
||||
!adev->gfx.gfx_ring[i].no_user_submission)
|
||||
++num_rings;
|
||||
ib_start_alignment = 32;
|
||||
ib_size_alignment = 32;
|
||||
@@ -395,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_COMPUTE:
|
||||
type = AMD_IP_BLOCK_TYPE_GFX;
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
if (adev->gfx.compute_ring[i].sched.ready)
|
||||
if (adev->gfx.compute_ring[i].sched.ready &&
|
||||
!adev->gfx.compute_ring[i].no_user_submission)
|
||||
++num_rings;
|
||||
ib_start_alignment = 32;
|
||||
ib_size_alignment = 32;
|
||||
@@ -403,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_DMA:
|
||||
type = AMD_IP_BLOCK_TYPE_SDMA;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
if (adev->sdma.instance[i].ring.sched.ready)
|
||||
if (adev->sdma.instance[i].ring.sched.ready &&
|
||||
!adev->sdma.instance[i].ring.no_user_submission)
|
||||
++num_rings;
|
||||
ib_start_alignment = 256;
|
||||
ib_size_alignment = 4;
|
||||
@@ -414,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->uvd.inst[i].ring.sched.ready)
|
||||
if (adev->uvd.inst[i].ring.sched.ready &&
|
||||
!adev->uvd.inst[i].ring.no_user_submission)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 256;
|
||||
@@ -423,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
case AMDGPU_HW_IP_VCE:
|
||||
type = AMD_IP_BLOCK_TYPE_VCE;
|
||||
for (i = 0; i < adev->vce.num_rings; i++)
|
||||
if (adev->vce.ring[i].sched.ready)
|
||||
if (adev->vce.ring[i].sched.ready &&
|
||||
!adev->vce.ring[i].no_user_submission)
|
||||
++num_rings;
|
||||
ib_start_alignment = 256;
|
||||
ib_size_alignment = 4;
|
||||
@@ -435,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->uvd.num_enc_rings; j++)
|
||||
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
|
||||
if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
|
||||
!adev->uvd.inst[i].ring_enc[j].no_user_submission)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 256;
|
||||
@@ -447,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->vcn.inst[i].ring_dec.sched.ready)
|
||||
if (adev->vcn.inst[i].ring_dec.sched.ready &&
|
||||
!adev->vcn.inst[i].ring_dec.no_user_submission)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 256;
|
||||
@@ -460,7 +488,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
|
||||
if (adev->vcn.inst[i].ring_enc[j].sched.ready)
|
||||
if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
|
||||
!adev->vcn.inst[i].ring_enc[j].no_user_submission)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 256;
|
||||
@@ -475,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
continue;
|
||||
|
||||
for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
|
||||
if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
|
||||
if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
|
||||
!adev->jpeg.inst[i].ring_dec[j].no_user_submission)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 256;
|
||||
@@ -483,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
||||
break;
|
||||
case AMDGPU_HW_IP_VPE:
|
||||
type = AMD_IP_BLOCK_TYPE_VPE;
|
||||
if (adev->vpe.ring.sched.ready)
|
||||
if (adev->vpe.ring.sched.ready &&
|
||||
!adev->vpe.ring.no_user_submission)
|
||||
++num_rings;
|
||||
ib_start_alignment = 256;
|
||||
ib_size_alignment = 4;
|
||||
@@ -978,6 +1009,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
}
|
||||
}
|
||||
|
||||
dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
|
||||
|
||||
ret = copy_to_user(out, dev_info,
|
||||
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
|
||||
kfree(dev_info);
|
||||
@@ -1293,6 +1326,22 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
return copy_to_user(out, &gpuvm_fault,
|
||||
min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
|
||||
}
|
||||
case AMDGPU_INFO_UQ_FW_AREAS: {
|
||||
struct drm_amdgpu_info_uq_metadata meta_info = {};
|
||||
|
||||
switch (info->query_hw_ip.type) {
|
||||
case AMDGPU_HW_IP_GFX:
|
||||
ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = copy_to_user(out, &meta_info,
|
||||
min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
default:
|
||||
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
|
||||
return -EINVAL;
|
||||
@@ -1376,8 +1425,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
||||
mutex_init(&fpriv->bo_list_lock);
|
||||
idr_init_base(&fpriv->bo_list_handles, 1);
|
||||
|
||||
r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
|
||||
if (r)
|
||||
goto error_vm;
|
||||
|
||||
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
|
||||
|
||||
r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
|
||||
if (r)
|
||||
DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
|
||||
|
||||
file_priv->driver_priv = fpriv;
|
||||
goto out_suspend;
|
||||
|
||||
@@ -1445,6 +1502,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
|
||||
amdgpu_bo_unreserve(pd);
|
||||
}
|
||||
|
||||
fpriv->evf_mgr.fd_closing = true;
|
||||
amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
|
||||
amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
|
||||
|
||||
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
|
||||
amdgpu_vm_fini(adev, &fpriv->vm);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -111,8 +111,8 @@ struct amdgpu_mes {
|
||||
|
||||
uint32_t vmid_mask_gfxhub;
|
||||
uint32_t vmid_mask_mmhub;
|
||||
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
|
||||
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
|
||||
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
|
||||
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
|
||||
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
|
||||
uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
|
||||
@@ -149,19 +149,6 @@ struct amdgpu_mes {
|
||||
|
||||
};
|
||||
|
||||
struct amdgpu_mes_process {
|
||||
int pasid;
|
||||
struct amdgpu_vm *vm;
|
||||
uint64_t pd_gpu_addr;
|
||||
struct amdgpu_bo *proc_ctx_bo;
|
||||
uint64_t proc_ctx_gpu_addr;
|
||||
void *proc_ctx_cpu_ptr;
|
||||
uint64_t process_quantum;
|
||||
struct list_head gang_list;
|
||||
uint32_t doorbell_index;
|
||||
struct mutex doorbell_lock;
|
||||
};
|
||||
|
||||
struct amdgpu_mes_gang {
|
||||
int gang_id;
|
||||
int priority;
|
||||
@@ -248,18 +235,6 @@ struct mes_remove_queue_input {
|
||||
uint64_t gang_context_addr;
|
||||
};
|
||||
|
||||
struct mes_reset_queue_input {
|
||||
uint32_t doorbell_offset;
|
||||
uint64_t gang_context_addr;
|
||||
bool use_mmio;
|
||||
uint32_t queue_type;
|
||||
uint32_t me_id;
|
||||
uint32_t pipe_id;
|
||||
uint32_t queue_id;
|
||||
uint32_t xcc_id;
|
||||
uint32_t vmid;
|
||||
};
|
||||
|
||||
struct mes_map_legacy_queue_input {
|
||||
uint32_t queue_type;
|
||||
uint32_t doorbell_offset;
|
||||
@@ -291,7 +266,7 @@ struct mes_resume_gang_input {
|
||||
uint64_t gang_context_addr;
|
||||
};
|
||||
|
||||
struct mes_reset_legacy_queue_input {
|
||||
struct mes_reset_queue_input {
|
||||
uint32_t queue_type;
|
||||
uint32_t doorbell_offset;
|
||||
bool use_mmio;
|
||||
@@ -301,6 +276,8 @@ struct mes_reset_legacy_queue_input {
|
||||
uint64_t mqd_addr;
|
||||
uint64_t wptr_addr;
|
||||
uint32_t vmid;
|
||||
bool legacy_gfx;
|
||||
bool is_kq;
|
||||
};
|
||||
|
||||
enum mes_misc_opcode {
|
||||
@@ -388,9 +365,6 @@ struct amdgpu_mes_funcs {
|
||||
int (*misc_op)(struct amdgpu_mes *mes,
|
||||
struct mes_misc_op_input *input);
|
||||
|
||||
int (*reset_legacy_queue)(struct amdgpu_mes *mes,
|
||||
struct mes_reset_legacy_queue_input *input);
|
||||
|
||||
int (*reset_hw_queue)(struct amdgpu_mes *mes,
|
||||
struct mes_reset_queue_input *input);
|
||||
};
|
||||
@@ -404,26 +378,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
|
||||
int amdgpu_mes_init(struct amdgpu_device *adev);
|
||||
void amdgpu_mes_fini(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
|
||||
struct amdgpu_vm *vm);
|
||||
void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
|
||||
|
||||
int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
|
||||
struct amdgpu_mes_gang_properties *gprops,
|
||||
int *gang_id);
|
||||
int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
|
||||
|
||||
int amdgpu_mes_suspend(struct amdgpu_device *adev);
|
||||
int amdgpu_mes_resume(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
|
||||
struct amdgpu_mes_queue_properties *qprops,
|
||||
int *queue_id);
|
||||
int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
|
||||
int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id);
|
||||
int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
|
||||
int me_id, int pipe_id, int queue_id, int vmid);
|
||||
|
||||
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
|
||||
@@ -451,27 +408,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
|
||||
bool trap_en);
|
||||
int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
|
||||
uint64_t process_context_addr);
|
||||
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
|
||||
int queue_type, int idx,
|
||||
struct amdgpu_mes_ctx_data *ctx_data,
|
||||
struct amdgpu_ring **out);
|
||||
void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
|
||||
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
|
||||
enum amdgpu_mes_priority_level prio);
|
||||
|
||||
int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
|
||||
struct amdgpu_mes_ctx_data *ctx_data);
|
||||
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
|
||||
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
struct amdgpu_mes_ctx_data *ctx_data);
|
||||
int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
|
||||
struct amdgpu_mes_ctx_data *ctx_data);
|
||||
|
||||
int amdgpu_mes_self_test(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
|
||||
|
||||
/*
|
||||
|
||||
@@ -1644,7 +1644,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
|
||||
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
|
||||
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
|
||||
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
|
||||
|
||||
/* Add the gem obj resv fence dump*/
|
||||
if (dma_resv_trylock(bo->tbo.base.resv)) {
|
||||
dma_resv_describe(bo->tbo.base.resv, m);
|
||||
dma_resv_unlock(bo->tbo.base.resv);
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
||||
return size;
|
||||
|
||||
@@ -2214,7 +2214,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
|
||||
|
||||
if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
|
||||
!psp->securedisplay_context.context.bin_desc.start_addr) {
|
||||
dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
|
||||
dev_info(psp->adev->dev,
|
||||
"SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -107,6 +107,7 @@ enum psp_reg_prog_id {
|
||||
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
|
||||
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
|
||||
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
|
||||
PSP_REG_MMHUB_L1_TLB_CNTL = 25,
|
||||
PSP_REG_LAST
|
||||
};
|
||||
|
||||
@@ -142,6 +143,8 @@ struct psp_funcs {
|
||||
bool (*get_ras_capability)(struct psp_context *psp);
|
||||
bool (*is_aux_sos_load_required)(struct psp_context *psp);
|
||||
bool (*is_reload_needed)(struct psp_context *psp);
|
||||
int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
|
||||
enum psp_reg_prog_id id);
|
||||
};
|
||||
|
||||
struct ta_funcs {
|
||||
@@ -475,6 +478,10 @@ struct amdgpu_psp_funcs {
|
||||
#define psp_is_aux_sos_load_required(psp) \
|
||||
((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
|
||||
|
||||
#define psp_reg_program_no_ring(psp, val, id) \
|
||||
((psp)->funcs->reg_program_no_ring ? \
|
||||
(psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
|
||||
|
||||
extern const struct amd_ip_funcs psp_ip_funcs;
|
||||
|
||||
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
|
||||
@@ -569,5 +576,8 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
|
||||
int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
|
||||
bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
|
||||
bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
|
||||
int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
|
||||
enum psp_reg_prog_id id);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1498,6 +1498,9 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
|
||||
!amdgpu_ras_get_aca_debug_mode(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* skip ras error reset in gpu reset */
|
||||
if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
|
||||
((smu_funcs && smu_funcs->set_debug_mode) ||
|
||||
@@ -2161,7 +2164,7 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
|
||||
/* Fatal error events are handled on host side */
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
/**
|
||||
/*
|
||||
* If the current interrupt is caused by a non-fatal RAS error, skip
|
||||
* check for fatal error. For fatal errors, FED status of all devices
|
||||
* in XGMI hive gets set when the first device gets fatal error
|
||||
@@ -3793,10 +3796,12 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
|
||||
adev->ras_hw_enabled & amdgpu_ras_mask;
|
||||
|
||||
/* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
|
||||
adev->aca.is_enabled =
|
||||
(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
|
||||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
|
||||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
adev->aca.is_enabled =
|
||||
(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
|
||||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
|
||||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
|
||||
}
|
||||
|
||||
/* bad page feature is not applicable to specific app platform */
|
||||
if (adev->gmc.is_app_apu &&
|
||||
|
||||
@@ -418,6 +418,7 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control
|
||||
hdr->version = RAS_TABLE_VER_V2_1;
|
||||
return;
|
||||
case IP_VERSION(12, 0, 0):
|
||||
case IP_VERSION(12, 5, 0):
|
||||
hdr->version = RAS_TABLE_VER_V3;
|
||||
return;
|
||||
default:
|
||||
@@ -1392,17 +1393,39 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
|
||||
|
||||
__decode_table_header_from_buf(hdr, buf);
|
||||
|
||||
if (hdr->version >= RAS_TABLE_VER_V2_1) {
|
||||
if (hdr->header != RAS_TABLE_HDR_VAL &&
|
||||
hdr->header != RAS_TABLE_HDR_BAD) {
|
||||
dev_info(adev->dev, "Creating a new EEPROM table");
|
||||
return amdgpu_ras_eeprom_reset_table(control);
|
||||
}
|
||||
|
||||
switch (hdr->version) {
|
||||
case RAS_TABLE_VER_V2_1:
|
||||
case RAS_TABLE_VER_V3:
|
||||
control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
|
||||
control->ras_record_offset = RAS_RECORD_START_V2_1;
|
||||
control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
|
||||
} else {
|
||||
break;
|
||||
case RAS_TABLE_VER_V1:
|
||||
control->ras_num_recs = RAS_NUM_RECS(hdr);
|
||||
control->ras_record_offset = RAS_RECORD_START;
|
||||
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev,
|
||||
"RAS header invalid, unsupported version: %u",
|
||||
hdr->version);
|
||||
return -EINVAL;
|
||||
}
|
||||
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
|
||||
|
||||
if (control->ras_num_recs > control->ras_max_record_count) {
|
||||
dev_err(adev->dev,
|
||||
"RAS header invalid, records in header: %u max allowed :%u",
|
||||
control->ras_num_recs, control->ras_max_record_count);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
|
||||
control->ras_num_mca_recs = 0;
|
||||
control->ras_num_pa_recs = 0;
|
||||
return 0;
|
||||
@@ -1413,7 +1436,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
int res;
|
||||
int res = 0;
|
||||
|
||||
if (!__is_ras_eeprom_supported(adev))
|
||||
return 0;
|
||||
@@ -1494,10 +1517,6 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
|
||||
"User defined threshold is set, runtime service will be halt when threshold is reached\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DRM_INFO("Creating a new EEPROM table");
|
||||
|
||||
res = amdgpu_ras_eeprom_reset_table(control);
|
||||
}
|
||||
|
||||
return res < 0 ? res : 0;
|
||||
|
||||
@@ -187,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
|
||||
}
|
||||
|
||||
#define amdgpu_ring_get_gpu_addr(ring, offset) \
|
||||
(ring->is_mes_queue ? \
|
||||
(ring->mes_ctx->meta_data_gpu_addr + offset) : \
|
||||
(ring->adev->wb.gpu_addr + offset * 4))
|
||||
(ring->adev->wb.gpu_addr + offset * 4)
|
||||
|
||||
#define amdgpu_ring_get_cpu_addr(ring, offset) \
|
||||
(ring->is_mes_queue ? \
|
||||
(void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
|
||||
(&ring->adev->wb.wb[offset]))
|
||||
(&ring->adev->wb.wb[offset])
|
||||
|
||||
/**
|
||||
* amdgpu_ring_init - init driver ring struct.
|
||||
@@ -243,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
ring->sched_score = sched_score;
|
||||
ring->vmid_wait = dma_fence_get_stub();
|
||||
|
||||
if (!ring->is_mes_queue) {
|
||||
ring->idx = adev->num_rings++;
|
||||
adev->rings[ring->idx] = ring;
|
||||
}
|
||||
ring->idx = adev->num_rings++;
|
||||
adev->rings[ring->idx] = ring;
|
||||
|
||||
r = amdgpu_fence_driver_init_ring(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_RPTR_OFFS);
|
||||
ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_WPTR_OFFS);
|
||||
ring->fence_offs = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_FENCE_OFFS);
|
||||
ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_TRAIL_FENCE_OFFS);
|
||||
ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_COND_EXE_OFFS);
|
||||
} else {
|
||||
r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &ring->fence_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
r = amdgpu_device_wb_get(adev, &ring->fence_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
ring->fence_gpu_addr =
|
||||
@@ -353,18 +334,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
||||
ring->cached_rptr = 0;
|
||||
|
||||
/* Allocate ring buffer */
|
||||
if (ring->is_mes_queue) {
|
||||
int offset = 0;
|
||||
|
||||
BUG_ON(ring->ring_size > PAGE_SIZE*4);
|
||||
|
||||
offset = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_RING_OFFS);
|
||||
ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
amdgpu_ring_clear_ring(ring);
|
||||
|
||||
} else if (ring->ring_obj == NULL) {
|
||||
if (ring->ring_obj == NULL) {
|
||||
r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
&ring->ring_obj,
|
||||
@@ -401,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
|
||||
{
|
||||
|
||||
/* Not to finish a ring which is not initialized */
|
||||
if (!(ring->adev) ||
|
||||
(!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
|
||||
if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
|
||||
return;
|
||||
|
||||
ring->sched.ready = false;
|
||||
|
||||
if (!ring->is_mes_queue) {
|
||||
amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
|
||||
amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
|
||||
amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
|
||||
amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
|
||||
|
||||
amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
|
||||
amdgpu_device_wb_free(ring->adev, ring->fence_offs);
|
||||
amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
|
||||
amdgpu_device_wb_free(ring->adev, ring->fence_offs);
|
||||
|
||||
amdgpu_bo_free_kernel(&ring->ring_obj,
|
||||
&ring->gpu_addr,
|
||||
(void **)&ring->ring);
|
||||
} else {
|
||||
kfree(ring->fence_drv.fences);
|
||||
}
|
||||
amdgpu_bo_free_kernel(&ring->ring_obj,
|
||||
&ring->gpu_addr,
|
||||
(void **)&ring->ring);
|
||||
|
||||
dma_fence_put(ring->vmid_wait);
|
||||
ring->vmid_wait = NULL;
|
||||
ring->me = 0;
|
||||
|
||||
if (!ring->is_mes_queue)
|
||||
ring->adev->rings[ring->idx] = NULL;
|
||||
ring->adev->rings[ring->idx] = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -164,8 +164,24 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
|
||||
|
||||
/* provided by hw blocks that expose a ring buffer for commands */
|
||||
struct amdgpu_ring_funcs {
|
||||
/**
|
||||
* @type:
|
||||
*
|
||||
* GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
|
||||
* use ring buffers. The type field just identifies which component the
|
||||
* ring buffer is associated with.
|
||||
*/
|
||||
enum amdgpu_ring_type type;
|
||||
uint32_t align_mask;
|
||||
|
||||
/**
|
||||
* @nop:
|
||||
*
|
||||
* Every block in the amdgpu has no-op instructions (e.g., GFX 10
|
||||
* uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
|
||||
* etc). This field receives the specific no-op for the component
|
||||
* that initializes the ring.
|
||||
*/
|
||||
u32 nop;
|
||||
bool support_64bit_ptrs;
|
||||
bool no_user_fence;
|
||||
@@ -241,6 +257,9 @@ struct amdgpu_ring_funcs {
|
||||
bool (*is_guilty)(struct amdgpu_ring *ring);
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_ring - Holds ring information
|
||||
*/
|
||||
struct amdgpu_ring {
|
||||
struct amdgpu_device *adev;
|
||||
const struct amdgpu_ring_funcs *funcs;
|
||||
@@ -252,13 +271,61 @@ struct amdgpu_ring {
|
||||
unsigned rptr_offs;
|
||||
u64 rptr_gpu_addr;
|
||||
volatile u32 *rptr_cpu_addr;
|
||||
|
||||
/**
|
||||
* @wptr:
|
||||
*
|
||||
* This is part of the Ring buffer implementation and represents the
|
||||
* write pointer. The wptr determines where the host has written.
|
||||
*/
|
||||
u64 wptr;
|
||||
|
||||
/**
|
||||
* @wptr_old:
|
||||
*
|
||||
* Before update wptr with the new value, usually the old value is
|
||||
* stored in the wptr_old.
|
||||
*/
|
||||
u64 wptr_old;
|
||||
unsigned ring_size;
|
||||
|
||||
/**
|
||||
* @max_dw:
|
||||
*
|
||||
* Maximum number of DWords for ring allocation. This information is
|
||||
* provided at the ring initialization time, and each IP block can
|
||||
* specify a specific value. Check places that invoke
|
||||
* amdgpu_ring_init() to see the maximum size per block.
|
||||
*/
|
||||
unsigned max_dw;
|
||||
|
||||
/**
|
||||
* @count_dw:
|
||||
*
|
||||
* This value starts with the maximum amount of DWords supported by the
|
||||
* ring. This value is updated based on the ring manipulation.
|
||||
*/
|
||||
int count_dw;
|
||||
uint64_t gpu_addr;
|
||||
|
||||
/**
|
||||
* @ptr_mask:
|
||||
*
|
||||
* Some IPs provide support for 64-bit pointers and others for 32-bit
|
||||
* only; this behavior is component-specific and defined by the field
|
||||
* support_64bit_ptr. If the IP block supports 64-bits, the mask
|
||||
* 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
|
||||
* Notice that this field is used to keep wptr under a valid range.
|
||||
*/
|
||||
uint64_t ptr_mask;
|
||||
|
||||
/**
|
||||
* @buf_mask:
|
||||
*
|
||||
* Buffer mask is a value used to keep wptr count under its
|
||||
* thresholding. Buffer mask initialized during the ring buffer
|
||||
* initialization time, and it is defined as (ring_size / 4) -1.
|
||||
*/
|
||||
uint32_t buf_mask;
|
||||
u32 idx;
|
||||
u32 xcc_id;
|
||||
@@ -276,6 +343,13 @@ struct amdgpu_ring {
|
||||
bool use_pollmem;
|
||||
unsigned wptr_offs;
|
||||
u64 wptr_gpu_addr;
|
||||
|
||||
/**
|
||||
* @wptr_cpu_addr:
|
||||
*
|
||||
* This is the CPU address pointer in the writeback slot. This is used
|
||||
* to commit changes to the GPU.
|
||||
*/
|
||||
volatile u32 *wptr_cpu_addr;
|
||||
unsigned fence_offs;
|
||||
u64 fence_gpu_addr;
|
||||
@@ -297,20 +371,15 @@ struct amdgpu_ring {
|
||||
struct dma_fence *vmid_wait;
|
||||
bool has_compute_vm_bug;
|
||||
bool no_scheduler;
|
||||
bool no_user_submission;
|
||||
int hw_prio;
|
||||
unsigned num_hw_submission;
|
||||
atomic_t *sched_score;
|
||||
|
||||
/* used for mes */
|
||||
bool is_mes_queue;
|
||||
uint32_t hw_queue_id;
|
||||
struct amdgpu_mes_ctx_data *mes_ctx;
|
||||
|
||||
bool is_sw_ring;
|
||||
unsigned int entry_index;
|
||||
/* store the cached rptr to restore after reset */
|
||||
uint64_t cached_rptr;
|
||||
|
||||
};
|
||||
|
||||
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
|
||||
@@ -435,15 +504,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
|
||||
ring->ring[offset] = cur - offset;
|
||||
}
|
||||
|
||||
#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
|
||||
(ring->is_mes_queue && ring->mes_ctx ? \
|
||||
(ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
|
||||
|
||||
#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \
|
||||
(ring->is_mes_queue && ring->mes_ctx ? \
|
||||
(void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
|
||||
NULL)
|
||||
|
||||
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
|
||||
|
||||
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
|
||||
|
||||
@@ -237,6 +237,20 @@ struct amdgpu_rlc_funcs {
|
||||
void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
|
||||
int (*init)(struct amdgpu_device *adev);
|
||||
u32 (*get_csb_size)(struct amdgpu_device *adev);
|
||||
|
||||
/**
|
||||
* @get_csb_buffer: Get the clear state to be put into the hardware.
|
||||
*
|
||||
* The parameter adev is used to get the CS data and other gfx info,
|
||||
* and buffer is the RLC CS pointer
|
||||
*
|
||||
* Sometimes, the user space puts a request to clear the state in the
|
||||
* command buffer; this function provides the clear state that gets put
|
||||
* into the hardware. Note that the driver programs Clear State
|
||||
* Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
|
||||
* and it also provides a pointer to it which is used by the firmware
|
||||
* to load the clear state in some cases.
|
||||
*/
|
||||
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
|
||||
int (*get_cp_table_num)(struct amdgpu_device *adev);
|
||||
int (*resume)(struct amdgpu_device *adev);
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include "amdgpu_sdma.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "gc/gc_10_1_0_offset.h"
|
||||
#include "gc/gc_10_3_0_sh_mask.h"
|
||||
|
||||
#define AMDGPU_CSA_SDMA_SIZE 64
|
||||
/* SDMA CSA reside in the 3rd page of CSA */
|
||||
@@ -76,22 +78,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
|
||||
if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
|
||||
return 0;
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
uint32_t offset = 0;
|
||||
r = amdgpu_sdma_get_index_from_ring(ring, &index);
|
||||
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
sdma[ring->idx].sdma_meta_data);
|
||||
csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
} else {
|
||||
r = amdgpu_sdma_get_index_from_ring(ring, &index);
|
||||
|
||||
if (r || index > 31)
|
||||
csa_mc_addr = 0;
|
||||
else
|
||||
csa_mc_addr = amdgpu_csa_vaddr(adev) +
|
||||
AMDGPU_CSA_SDMA_OFFSET +
|
||||
index * AMDGPU_CSA_SDMA_SIZE;
|
||||
}
|
||||
if (r || index > 31)
|
||||
csa_mc_addr = 0;
|
||||
else
|
||||
csa_mc_addr = amdgpu_csa_vaddr(adev) +
|
||||
AMDGPU_CSA_SDMA_OFFSET +
|
||||
index * AMDGPU_CSA_SDMA_SIZE;
|
||||
|
||||
return csa_mc_addr;
|
||||
}
|
||||
@@ -537,28 +531,38 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
|
||||
* @funcs: Pointer to the callback structure containing pre_reset and post_reset functions
|
||||
*
|
||||
* This function allows KFD and AMDGPU to register their own callbacks for handling
|
||||
* pre-reset and post-reset operations for engine reset. These are needed because engine
|
||||
* reset will stop all queues on that engine.
|
||||
*/
|
||||
void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs)
|
||||
static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
|
||||
{
|
||||
if (!funcs)
|
||||
return;
|
||||
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
|
||||
int r = -EOPNOTSUPP;
|
||||
|
||||
/* Ensure the reset_callback_list is initialized */
|
||||
if (!adev->sdma.reset_callback_list.next) {
|
||||
INIT_LIST_HEAD(&adev->sdma.reset_callback_list);
|
||||
switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
|
||||
case IP_VERSION(4, 4, 2):
|
||||
case IP_VERSION(4, 4, 4):
|
||||
case IP_VERSION(4, 4, 5):
|
||||
/* For SDMA 4.x, use the existing DPM interface for backward compatibility */
|
||||
r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
|
||||
break;
|
||||
case IP_VERSION(5, 0, 0):
|
||||
case IP_VERSION(5, 0, 1):
|
||||
case IP_VERSION(5, 0, 2):
|
||||
case IP_VERSION(5, 0, 5):
|
||||
case IP_VERSION(5, 2, 0):
|
||||
case IP_VERSION(5, 2, 2):
|
||||
case IP_VERSION(5, 2, 4):
|
||||
case IP_VERSION(5, 2, 5):
|
||||
case IP_VERSION(5, 2, 6):
|
||||
case IP_VERSION(5, 2, 3):
|
||||
case IP_VERSION(5, 2, 1):
|
||||
case IP_VERSION(5, 2, 7):
|
||||
if (sdma_instance->funcs->soft_reset_kernel_queue)
|
||||
r = sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* Initialize the list node in the callback structure */
|
||||
INIT_LIST_HEAD(&funcs->list);
|
||||
|
||||
/* Add the callback structure to the global list */
|
||||
list_add_tail(&funcs->list, &adev->sdma.reset_callback_list);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -566,16 +570,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
|
||||
* @adev: Pointer to the AMDGPU device
|
||||
* @instance_id: ID of the SDMA engine instance to reset
|
||||
*
|
||||
* This function performs the following steps:
|
||||
* 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state.
|
||||
* 2. Resets the specified SDMA engine instance.
|
||||
* 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state.
|
||||
*
|
||||
* Returns: 0 on success, or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
|
||||
{
|
||||
struct sdma_on_reset_funcs *funcs;
|
||||
int ret = 0;
|
||||
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
|
||||
struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
|
||||
@@ -597,38 +595,18 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
|
||||
page_sched_stopped = true;
|
||||
}
|
||||
|
||||
/* Invoke all registered pre_reset callbacks */
|
||||
list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
|
||||
if (funcs->pre_reset) {
|
||||
ret = funcs->pre_reset(adev, instance_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"beforeReset callback failed for instance %u: %d\n",
|
||||
instance_id, ret);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sdma_instance->funcs->stop_kernel_queue)
|
||||
sdma_instance->funcs->stop_kernel_queue(gfx_ring);
|
||||
|
||||
/* Perform the SDMA reset for the specified instance */
|
||||
ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
|
||||
ret = amdgpu_sdma_soft_reset(adev, instance_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Invoke all registered post_reset callbacks */
|
||||
list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
|
||||
if (funcs->post_reset) {
|
||||
ret = funcs->post_reset(adev, instance_id);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"afterReset callback failed for instance %u: %d\n",
|
||||
instance_id, ret);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sdma_instance->funcs->start_kernel_queue)
|
||||
sdma_instance->funcs->start_kernel_queue(gfx_ring);
|
||||
|
||||
exit:
|
||||
/* Restart the scheduler's work queue for the GFX and page rings
|
||||
|
||||
@@ -50,6 +50,12 @@ enum amdgpu_sdma_irq {
|
||||
|
||||
#define NUM_SDMA(x) hweight32(x)
|
||||
|
||||
struct amdgpu_sdma_funcs {
|
||||
int (*stop_kernel_queue)(struct amdgpu_ring *ring);
|
||||
int (*start_kernel_queue)(struct amdgpu_ring *ring);
|
||||
int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
|
||||
};
|
||||
|
||||
struct amdgpu_sdma_instance {
|
||||
/* SDMA firmware */
|
||||
const struct firmware *fw;
|
||||
@@ -68,7 +74,7 @@ struct amdgpu_sdma_instance {
|
||||
/* track guilty state of GFX and PAGE queues */
|
||||
bool gfx_guilty;
|
||||
bool page_guilty;
|
||||
|
||||
const struct amdgpu_sdma_funcs *funcs;
|
||||
};
|
||||
|
||||
enum amdgpu_sdma_ras_memory_id {
|
||||
@@ -103,13 +109,6 @@ struct amdgpu_sdma_ras {
|
||||
struct amdgpu_ras_block_object ras_block;
|
||||
};
|
||||
|
||||
struct sdma_on_reset_funcs {
|
||||
int (*pre_reset)(struct amdgpu_device *adev, uint32_t instance_id);
|
||||
int (*post_reset)(struct amdgpu_device *adev, uint32_t instance_id);
|
||||
/* Linked list node to store this structure in a list; */
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct amdgpu_sdma {
|
||||
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
|
||||
struct amdgpu_irq_src trap_irq;
|
||||
@@ -131,6 +130,8 @@ struct amdgpu_sdma {
|
||||
uint32_t *ip_dump;
|
||||
uint32_t supported_reset;
|
||||
struct list_head reset_callback_list;
|
||||
bool no_user_submission;
|
||||
bool disable_uq;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -170,7 +171,6 @@ struct amdgpu_buffer_funcs {
|
||||
uint32_t byte_count);
|
||||
};
|
||||
|
||||
void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs);
|
||||
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id);
|
||||
|
||||
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
|
||||
|
||||
@@ -45,7 +45,11 @@
|
||||
*/
|
||||
static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
|
||||
{
|
||||
return AMDGPU_VA_RESERVED_SEQ64_START(adev);
|
||||
u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
|
||||
|
||||
addr = amdgpu_gmc_sign_extend(addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -63,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
|
||||
int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va)
|
||||
{
|
||||
u64 seq64_addr, va_flags;
|
||||
struct amdgpu_bo *bo;
|
||||
struct drm_exec exec;
|
||||
u64 seq64_addr;
|
||||
int r;
|
||||
|
||||
bo = adev->seq64.sbo;
|
||||
@@ -88,9 +92,11 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
goto error;
|
||||
}
|
||||
|
||||
seq64_addr = amdgpu_seq64_get_va_base(adev);
|
||||
seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
|
||||
|
||||
va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE,
|
||||
AMDGPU_PTE_READABLE);
|
||||
va_flags);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
|
||||
amdgpu_vm_bo_del(adev, *bo_va);
|
||||
@@ -156,6 +162,7 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @va: VA to access the seq in process address space
|
||||
* @gpu_addr: GPU address to access the seq
|
||||
* @cpu_addr: CPU address to access the seq
|
||||
*
|
||||
* Alloc a 64 bit memory from seq64 pool.
|
||||
@@ -163,7 +170,8 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
|
||||
* Returns:
|
||||
* 0 on success or a negative error code on failure
|
||||
*/
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
|
||||
u64 *gpu_addr, u64 **cpu_addr)
|
||||
{
|
||||
unsigned long bit_pos;
|
||||
|
||||
@@ -172,7 +180,12 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
|
||||
return -ENOSPC;
|
||||
|
||||
__set_bit(bit_pos, adev->seq64.used);
|
||||
|
||||
*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
|
||||
|
||||
if (gpu_addr)
|
||||
*gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
|
||||
|
||||
*cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
|
||||
|
||||
return 0;
|
||||
@@ -233,7 +246,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev)
|
||||
*/
|
||||
r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||||
&adev->seq64.sbo, NULL,
|
||||
&adev->seq64.sbo, &adev->seq64.gpu_addr,
|
||||
(void **)&adev->seq64.cpu_base_addr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
|
||||
|
||||
@@ -32,13 +32,14 @@
|
||||
struct amdgpu_seq64 {
|
||||
struct amdgpu_bo *sbo;
|
||||
u32 num_sem;
|
||||
u64 gpu_addr;
|
||||
u64 *cpu_base_addr;
|
||||
DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
|
||||
};
|
||||
|
||||
void amdgpu_seq64_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_seq64_init(struct amdgpu_device *adev);
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
|
||||
int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
|
||||
void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
|
||||
int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va);
|
||||
|
||||
@@ -249,9 +249,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
|
||||
if (resv == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
/* TODO: Use DMA_RESV_USAGE_READ here */
|
||||
dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
|
||||
/* Implicitly sync only to KERNEL, WRITE and READ */
|
||||
dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
|
||||
dma_fence_chain_for_each(f, f) {
|
||||
struct dma_fence *tmp = dma_fence_chain_contained(f);
|
||||
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
|
||||
#include "amdgpu_socbb.h"
|
||||
|
||||
#define RS64_FW_UC_START_ADDR_LO 0x3000
|
||||
|
||||
struct common_firmware_header {
|
||||
uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
|
||||
uint32_t header_size_bytes; /* size of just the header in bytes */
|
||||
|
||||
919
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
Normal file
919
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
Normal file
@@ -0,0 +1,919 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <drm/drm_auth.h>
|
||||
#include <drm/drm_exec.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "amdgpu_userq.h"
|
||||
#include "amdgpu_userq_fence.h"
|
||||
|
||||
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
u32 userq_ip_mask = 0;
|
||||
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
|
||||
if (adev->userq_funcs[i])
|
||||
userq_ip_mask |= (1 << i);
|
||||
}
|
||||
|
||||
return userq_ip_mask;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *userq_funcs =
|
||||
adev->userq_funcs[queue->queue_type];
|
||||
int r = 0;
|
||||
|
||||
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
|
||||
r = userq_funcs->unmap(uq_mgr, queue);
|
||||
if (r)
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
else
|
||||
queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *userq_funcs =
|
||||
adev->userq_funcs[queue->queue_type];
|
||||
int r = 0;
|
||||
|
||||
if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
|
||||
r = userq_funcs->map(uq_mgr, queue);
|
||||
if (r) {
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
} else {
|
||||
queue->state = AMDGPU_USERQ_STATE_MAPPED;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct dma_fence *f = queue->last_fence;
|
||||
int ret;
|
||||
|
||||
if (f && !dma_fence_is_signaled(f)) {
|
||||
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
|
||||
if (ret <= 0)
|
||||
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
|
||||
f->context, f->seqno);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue,
|
||||
int queue_id)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
|
||||
|
||||
uq_funcs->mqd_destroy(uq_mgr, queue);
|
||||
amdgpu_userq_fence_driver_free(queue);
|
||||
idr_remove(&uq_mgr->userq_idr, queue_id);
|
||||
kfree(queue);
|
||||
}
|
||||
|
||||
int
|
||||
amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr)
|
||||
{
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int queue_id;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&uq_mgr->userq_mutex);
|
||||
/* Resume all the queues for this process */
|
||||
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
|
||||
ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
|
||||
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct amdgpu_usermode_queue *
|
||||
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
|
||||
{
|
||||
return idr_find(&uq_mgr->userq_idr, qid);
|
||||
}
|
||||
|
||||
void
|
||||
amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr)
|
||||
{
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
|
||||
retry:
|
||||
/* Flush any pending resume work to create ev_fence */
|
||||
flush_delayed_work(&uq_mgr->resume_work);
|
||||
|
||||
mutex_lock(&uq_mgr->userq_mutex);
|
||||
spin_lock(&evf_mgr->ev_fence_lock);
|
||||
ev_fence = evf_mgr->ev_fence;
|
||||
spin_unlock(&evf_mgr->ev_fence_lock);
|
||||
if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
/*
|
||||
* Looks like there was no pending resume work,
|
||||
* add one now to create a valid eviction fence
|
||||
*/
|
||||
schedule_delayed_work(&uq_mgr->resume_work, 0);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_userq_obj *userq_obj,
|
||||
int size)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct amdgpu_bo_param bp;
|
||||
int r;
|
||||
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
bp.byte_align = PAGE_SIZE;
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
bp.type = ttm_bo_type_kernel;
|
||||
bp.size = size;
|
||||
bp.resv = NULL;
|
||||
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
|
||||
|
||||
r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_reserve(userq_obj->obj, true);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
|
||||
goto free_obj;
|
||||
}
|
||||
|
||||
r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
|
||||
goto unresv;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
|
||||
goto unresv;
|
||||
}
|
||||
|
||||
userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
|
||||
amdgpu_bo_unreserve(userq_obj->obj);
|
||||
memset(userq_obj->cpu_ptr, 0, size);
|
||||
return 0;
|
||||
|
||||
unresv:
|
||||
amdgpu_bo_unreserve(userq_obj->obj);
|
||||
|
||||
free_obj:
|
||||
amdgpu_bo_unref(&userq_obj->obj);
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_userq_obj *userq_obj)
|
||||
{
|
||||
amdgpu_bo_kunmap(userq_obj->obj);
|
||||
amdgpu_bo_unref(&userq_obj->obj);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_db_info *db_info,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
uint64_t index;
|
||||
struct drm_gem_object *gobj;
|
||||
struct amdgpu_userq_obj *db_obj = db_info->db_obj;
|
||||
int r, db_size;
|
||||
|
||||
gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
|
||||
if (gobj == NULL) {
|
||||
drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
|
||||
drm_gem_object_put(gobj);
|
||||
|
||||
/* Pin the BO before generating the index, unpin in queue destroy */
|
||||
r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
|
||||
goto unref_bo;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_reserve(db_obj->obj, true);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
|
||||
goto unpin_bo;
|
||||
}
|
||||
|
||||
switch (db_info->queue_type) {
|
||||
case AMDGPU_HW_IP_GFX:
|
||||
case AMDGPU_HW_IP_COMPUTE:
|
||||
case AMDGPU_HW_IP_DMA:
|
||||
db_size = sizeof(u64);
|
||||
break;
|
||||
|
||||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
db_size = sizeof(u32);
|
||||
db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
|
||||
break;
|
||||
|
||||
case AMDGPU_HW_IP_VPE:
|
||||
db_size = sizeof(u32);
|
||||
db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
|
||||
db_info->queue_type);
|
||||
r = -EINVAL;
|
||||
goto unpin_bo;
|
||||
}
|
||||
|
||||
index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
|
||||
db_info->doorbell_offset, db_size);
|
||||
drm_dbg_driver(adev_to_drm(uq_mgr->adev),
|
||||
"[Usermode queues] doorbell index=%lld\n", index);
|
||||
amdgpu_bo_unreserve(db_obj->obj);
|
||||
return index;
|
||||
|
||||
unpin_bo:
|
||||
amdgpu_bo_unpin(db_obj->obj);
|
||||
|
||||
unref_bo:
|
||||
amdgpu_bo_unref(&db_obj->obj);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int r = 0;
|
||||
|
||||
cancel_delayed_work(&uq_mgr->resume_work);
|
||||
mutex_lock(&uq_mgr->userq_mutex);
|
||||
|
||||
queue = amdgpu_userq_find(uq_mgr, queue_id);
|
||||
if (!queue) {
|
||||
drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
|
||||
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
|
||||
amdgpu_bo_unpin(queue->db_obj.obj);
|
||||
amdgpu_bo_unref(&queue->db_obj.obj);
|
||||
amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
|
||||
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_userq_priority_permit(struct drm_file *filp,
|
||||
int priority)
|
||||
{
|
||||
if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
|
||||
return 0;
|
||||
|
||||
if (capable(CAP_SYS_NICE))
|
||||
return 0;
|
||||
|
||||
if (drm_is_current_master(filp))
|
||||
return 0;
|
||||
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *uq_funcs;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_db_info db_info;
|
||||
bool skip_map_queue;
|
||||
uint64_t index;
|
||||
int qid, r = 0;
|
||||
int priority =
|
||||
(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
|
||||
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
|
||||
|
||||
/* Usermode queues are only supported for GFX IP as of now */
|
||||
if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
|
||||
args->in.ip_type != AMDGPU_HW_IP_DMA &&
|
||||
args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
|
||||
drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n",
|
||||
args->in.ip_type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = amdgpu_userq_priority_permit(filp, priority);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
|
||||
(args->in.ip_type != AMDGPU_HW_IP_GFX) &&
|
||||
(args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
|
||||
!amdgpu_is_tmz(adev)) {
|
||||
drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
|
||||
if (r < 0) {
|
||||
drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* There could be a situation that we are creating a new queue while
|
||||
* the other queues under this UQ_mgr are suspended. So if there is any
|
||||
* resume work pending, wait for it to get done.
|
||||
*
|
||||
* This will also make sure we have a valid eviction fence ready to be used.
|
||||
*/
|
||||
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
|
||||
|
||||
uq_funcs = adev->userq_funcs[args->in.ip_type];
|
||||
if (!uq_funcs) {
|
||||
drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
|
||||
args->in.ip_type);
|
||||
r = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
|
||||
if (!queue) {
|
||||
drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
|
||||
r = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
queue->doorbell_handle = args->in.doorbell_handle;
|
||||
queue->queue_type = args->in.ip_type;
|
||||
queue->vm = &fpriv->vm;
|
||||
queue->priority = priority;
|
||||
|
||||
db_info.queue_type = queue->queue_type;
|
||||
db_info.doorbell_handle = queue->doorbell_handle;
|
||||
db_info.db_obj = &queue->db_obj;
|
||||
db_info.doorbell_offset = args->in.doorbell_offset;
|
||||
|
||||
/* Convert relative doorbell offset into absolute doorbell index */
|
||||
index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
|
||||
if (index == (uint64_t)-EINVAL) {
|
||||
drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
|
||||
kfree(queue);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
queue->doorbell_index = index;
|
||||
xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
|
||||
r = amdgpu_userq_fence_driver_alloc(adev, queue);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
|
||||
if (r) {
|
||||
drm_file_err(uq_mgr->file, "Failed to create Queue\n");
|
||||
amdgpu_userq_fence_driver_free(queue);
|
||||
kfree(queue);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
|
||||
qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
|
||||
if (qid < 0) {
|
||||
drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
|
||||
amdgpu_userq_fence_driver_free(queue);
|
||||
uq_funcs->mqd_destroy(uq_mgr, queue);
|
||||
kfree(queue);
|
||||
r = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* don't map the queue if scheduling is halted */
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
if (adev->userq_halt_for_enforce_isolation &&
|
||||
((queue->queue_type == AMDGPU_HW_IP_GFX) ||
|
||||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
|
||||
skip_map_queue = true;
|
||||
else
|
||||
skip_map_queue = false;
|
||||
if (!skip_map_queue) {
|
||||
r = amdgpu_userq_map_helper(uq_mgr, queue);
|
||||
if (r) {
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
|
||||
idr_remove(&uq_mgr->userq_idr, qid);
|
||||
amdgpu_userq_fence_driver_free(queue);
|
||||
uq_funcs->mqd_destroy(uq_mgr, queue);
|
||||
kfree(queue);
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
|
||||
|
||||
args->out.queue_id = qid;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
union drm_amdgpu_userq *args = data;
|
||||
int r;
|
||||
|
||||
switch (args->in.op) {
|
||||
case AMDGPU_USERQ_OP_CREATE:
|
||||
if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
|
||||
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
|
||||
return -EINVAL;
|
||||
r = amdgpu_userq_create(filp, args);
|
||||
if (r)
|
||||
drm_file_err(filp, "Failed to create usermode queue\n");
|
||||
break;
|
||||
|
||||
case AMDGPU_USERQ_OP_FREE:
|
||||
if (args->in.ip_type ||
|
||||
args->in.doorbell_handle ||
|
||||
args->in.doorbell_offset ||
|
||||
args->in.flags ||
|
||||
args->in.queue_va ||
|
||||
args->in.queue_size ||
|
||||
args->in.rptr_va ||
|
||||
args->in.wptr_va ||
|
||||
args->in.wptr_va ||
|
||||
args->in.mqd ||
|
||||
args->in.mqd_size)
|
||||
return -EINVAL;
|
||||
r = amdgpu_userq_destroy(filp, args->in.queue_id);
|
||||
if (r)
|
||||
drm_file_err(filp, "Failed to destroy usermode queue\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
|
||||
{
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
/* Resume all the queues for this process */
|
||||
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
|
||||
r = amdgpu_userq_map_helper(uq_mgr, queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
|
||||
{
|
||||
struct ttm_operation_ctx ctx = { false, false };
|
||||
int ret;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
|
||||
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (ret)
|
||||
DRM_ERROR("Fail to validate\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
|
||||
struct amdgpu_vm *vm = &fpriv->vm;
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct amdgpu_bo_va *bo_va;
|
||||
struct ww_acquire_ctx *ticket;
|
||||
struct drm_exec exec;
|
||||
struct amdgpu_bo *bo;
|
||||
struct dma_resv *resv;
|
||||
bool clear, unlock;
|
||||
int ret = 0;
|
||||
|
||||
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
|
||||
drm_exec_until_all_locked(&exec) {
|
||||
ret = amdgpu_vm_lock_pd(vm, &exec, 2);
|
||||
drm_exec_retry_on_contention(&exec);
|
||||
if (unlikely(ret)) {
|
||||
drm_file_err(uq_mgr->file, "Failed to lock PD\n");
|
||||
goto unlock_all;
|
||||
}
|
||||
|
||||
/* Lock the done list */
|
||||
list_for_each_entry(bo_va, &vm->done, base.vm_status) {
|
||||
bo = bo_va->base.bo;
|
||||
if (!bo)
|
||||
continue;
|
||||
|
||||
ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
|
||||
drm_exec_retry_on_contention(&exec);
|
||||
if (unlikely(ret))
|
||||
goto unlock_all;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&vm->status_lock);
|
||||
while (!list_empty(&vm->moved)) {
|
||||
bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
|
||||
base.vm_status);
|
||||
spin_unlock(&vm->status_lock);
|
||||
|
||||
/* Per VM BOs never need to bo cleared in the page tables */
|
||||
ret = amdgpu_vm_bo_update(adev, bo_va, false);
|
||||
if (ret)
|
||||
goto unlock_all;
|
||||
spin_lock(&vm->status_lock);
|
||||
}
|
||||
|
||||
ticket = &exec.ticket;
|
||||
while (!list_empty(&vm->invalidated)) {
|
||||
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
|
||||
base.vm_status);
|
||||
resv = bo_va->base.bo->tbo.base.resv;
|
||||
spin_unlock(&vm->status_lock);
|
||||
|
||||
bo = bo_va->base.bo;
|
||||
ret = amdgpu_userq_validate_vm_bo(NULL, bo);
|
||||
if (ret) {
|
||||
drm_file_err(uq_mgr->file, "Failed to validate BO\n");
|
||||
goto unlock_all;
|
||||
}
|
||||
|
||||
/* Try to reserve the BO to avoid clearing its ptes */
|
||||
if (!adev->debug_vm && dma_resv_trylock(resv)) {
|
||||
clear = false;
|
||||
unlock = true;
|
||||
/* The caller is already holding the reservation lock */
|
||||
} else if (dma_resv_locking_ctx(resv) == ticket) {
|
||||
clear = false;
|
||||
unlock = false;
|
||||
/* Somebody else is using the BO right now */
|
||||
} else {
|
||||
clear = true;
|
||||
unlock = false;
|
||||
}
|
||||
|
||||
ret = amdgpu_vm_bo_update(adev, bo_va, clear);
|
||||
|
||||
if (unlock)
|
||||
dma_resv_unlock(resv);
|
||||
if (ret)
|
||||
goto unlock_all;
|
||||
|
||||
spin_lock(&vm->status_lock);
|
||||
}
|
||||
spin_unlock(&vm->status_lock);
|
||||
|
||||
ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
|
||||
if (ret)
|
||||
drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
|
||||
|
||||
unlock_all:
|
||||
drm_exec_fini(&exec);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_restore_worker(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
|
||||
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
|
||||
int ret;
|
||||
|
||||
flush_work(&fpriv->evf_mgr.suspend_work.work);
|
||||
|
||||
mutex_lock(&uq_mgr->userq_mutex);
|
||||
|
||||
ret = amdgpu_userq_validate_bos(uq_mgr);
|
||||
if (ret) {
|
||||
drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = amdgpu_userq_restore_all(uq_mgr);
|
||||
if (ret) {
|
||||
drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
|
||||
{
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
/* Try to unmap all the queues in this process ctx */
|
||||
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
|
||||
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
|
||||
{
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int queue_id, ret;
|
||||
|
||||
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
|
||||
struct dma_fence *f = queue->last_fence;
|
||||
|
||||
if (!f || dma_fence_is_signaled(f))
|
||||
continue;
|
||||
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
|
||||
if (ret <= 0) {
|
||||
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
|
||||
f->context, f->seqno);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_eviction_fence *ev_fence)
|
||||
{
|
||||
int ret;
|
||||
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
|
||||
|
||||
/* Wait for any pending userqueue fence work to finish */
|
||||
ret = amdgpu_userq_wait_for_signal(uq_mgr);
|
||||
if (ret) {
|
||||
drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = amdgpu_userq_evict_all(uq_mgr);
|
||||
if (ret) {
|
||||
drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal current eviction fence */
|
||||
amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
|
||||
|
||||
if (evf_mgr->fd_closing) {
|
||||
cancel_delayed_work(&uq_mgr->resume_work);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Schedule a resume work */
|
||||
schedule_delayed_work(&uq_mgr->resume_work, 0);
|
||||
}
|
||||
|
||||
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
|
||||
struct amdgpu_device *adev)
|
||||
{
|
||||
mutex_init(&userq_mgr->userq_mutex);
|
||||
idr_init_base(&userq_mgr->userq_idr, 1);
|
||||
userq_mgr->adev = adev;
|
||||
userq_mgr->file = file_priv;
|
||||
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
list_add(&userq_mgr->list, &adev->userq_mgr_list);
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
|
||||
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
|
||||
{
|
||||
struct amdgpu_device *adev = userq_mgr->adev;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
uint32_t queue_id;
|
||||
|
||||
cancel_delayed_work(&userq_mgr->resume_work);
|
||||
|
||||
mutex_lock(&userq_mgr->userq_mutex);
|
||||
idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
|
||||
amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
|
||||
amdgpu_userq_unmap_helper(userq_mgr, queue);
|
||||
amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
|
||||
}
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
if (uqm == userq_mgr) {
|
||||
list_del(&uqm->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
idr_destroy(&userq_mgr->userq_idr);
|
||||
mutex_unlock(&userq_mgr->userq_mutex);
|
||||
mutex_destroy(&userq_mgr->userq_mutex);
|
||||
}
|
||||
|
||||
int amdgpu_userq_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
int queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
if (!ip_mask)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
cancel_delayed_work_sync(&uqm->resume_work);
|
||||
mutex_lock(&uqm->userq_mutex);
|
||||
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
|
||||
r = amdgpu_userq_unmap_helper(uqm, queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
}
|
||||
mutex_unlock(&uqm->userq_mutex);
|
||||
}
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_userq_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
int queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
if (!ip_mask)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
mutex_lock(&uqm->userq_mutex);
|
||||
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
|
||||
r = amdgpu_userq_map_helper(uqm, queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
}
|
||||
mutex_unlock(&uqm->userq_mutex);
|
||||
}
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
|
||||
u32 idx)
|
||||
{
|
||||
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
int queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
/* only need to stop gfx/compute */
|
||||
if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
if (adev->userq_halt_for_enforce_isolation)
|
||||
dev_warn(adev->dev, "userq scheduling already stopped!\n");
|
||||
adev->userq_halt_for_enforce_isolation = true;
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
cancel_delayed_work_sync(&uqm->resume_work);
|
||||
mutex_lock(&uqm->userq_mutex);
|
||||
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
|
||||
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
|
||||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
|
||||
(queue->xcp_id == idx)) {
|
||||
r = amdgpu_userq_unmap_helper(uqm, queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&uqm->userq_mutex);
|
||||
}
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
|
||||
u32 idx)
|
||||
{
|
||||
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
int queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
/* only need to stop gfx/compute */
|
||||
if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&adev->userq_mutex);
|
||||
if (!adev->userq_halt_for_enforce_isolation)
|
||||
dev_warn(adev->dev, "userq scheduling already started!\n");
|
||||
adev->userq_halt_for_enforce_isolation = false;
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
mutex_lock(&uqm->userq_mutex);
|
||||
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
|
||||
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
|
||||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
|
||||
(queue->xcp_id == idx)) {
|
||||
r = amdgpu_userq_map_helper(uqm, queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&uqm->userq_mutex);
|
||||
}
|
||||
mutex_unlock(&adev->userq_mutex);
|
||||
return ret;
|
||||
}
|
||||
135
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
Normal file
135
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
Normal file
@@ -0,0 +1,135 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef AMDGPU_USERQ_H_
|
||||
#define AMDGPU_USERQ_H_
|
||||
#include "amdgpu_eviction_fence.h"
|
||||
|
||||
#define AMDGPU_MAX_USERQ_COUNT 512
|
||||
|
||||
#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
|
||||
#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
|
||||
#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
|
||||
|
||||
enum amdgpu_userq_state {
|
||||
AMDGPU_USERQ_STATE_UNMAPPED = 0,
|
||||
AMDGPU_USERQ_STATE_MAPPED,
|
||||
AMDGPU_USERQ_STATE_PREEMPTED,
|
||||
AMDGPU_USERQ_STATE_HUNG,
|
||||
};
|
||||
|
||||
struct amdgpu_mqd_prop;
|
||||
|
||||
struct amdgpu_userq_obj {
|
||||
void *cpu_ptr;
|
||||
uint64_t gpu_addr;
|
||||
struct amdgpu_bo *obj;
|
||||
};
|
||||
|
||||
struct amdgpu_usermode_queue {
|
||||
int queue_type;
|
||||
enum amdgpu_userq_state state;
|
||||
uint64_t doorbell_handle;
|
||||
uint64_t doorbell_index;
|
||||
uint64_t flags;
|
||||
struct amdgpu_mqd_prop *userq_prop;
|
||||
struct amdgpu_userq_mgr *userq_mgr;
|
||||
struct amdgpu_vm *vm;
|
||||
struct amdgpu_userq_obj mqd;
|
||||
struct amdgpu_userq_obj db_obj;
|
||||
struct amdgpu_userq_obj fw_obj;
|
||||
struct amdgpu_userq_obj wptr_obj;
|
||||
struct xarray fence_drv_xa;
|
||||
struct amdgpu_userq_fence_driver *fence_drv;
|
||||
struct dma_fence *last_fence;
|
||||
u32 xcp_id;
|
||||
int priority;
|
||||
};
|
||||
|
||||
struct amdgpu_userq_funcs {
|
||||
int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct drm_amdgpu_userq_in *args,
|
||||
struct amdgpu_usermode_queue *queue);
|
||||
void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *uq);
|
||||
int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue);
|
||||
int (*map)(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue);
|
||||
};
|
||||
|
||||
/* Usermode queues for gfx */
|
||||
struct amdgpu_userq_mgr {
|
||||
struct idr userq_idr;
|
||||
struct mutex userq_mutex;
|
||||
struct amdgpu_device *adev;
|
||||
struct delayed_work resume_work;
|
||||
struct list_head list;
|
||||
struct drm_file *file;
|
||||
};
|
||||
|
||||
struct amdgpu_db_info {
|
||||
uint64_t doorbell_handle;
|
||||
uint32_t queue_type;
|
||||
uint32_t doorbell_offset;
|
||||
struct amdgpu_userq_obj *db_obj;
|
||||
};
|
||||
|
||||
int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
|
||||
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
|
||||
struct amdgpu_device *adev);
|
||||
|
||||
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
|
||||
|
||||
int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_userq_obj *userq_obj,
|
||||
int size);
|
||||
|
||||
void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_userq_obj *userq_obj);
|
||||
|
||||
void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_eviction_fence *ev_fence);
|
||||
|
||||
int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr);
|
||||
|
||||
void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr);
|
||||
|
||||
uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_db_info *db_info,
|
||||
struct drm_file *filp);
|
||||
|
||||
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_userq_suspend(struct amdgpu_device *adev);
|
||||
int amdgpu_userq_resume(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
|
||||
u32 idx);
|
||||
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
|
||||
u32 idx);
|
||||
|
||||
#endif
|
||||
966
drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
Normal file
966
drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
Normal file
@@ -0,0 +1,966 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dma-fence-unwrap.h>
|
||||
|
||||
#include <drm/drm_exec.h>
|
||||
#include <drm/drm_syncobj.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_userq_fence.h"
|
||||
|
||||
static const struct dma_fence_ops amdgpu_userq_fence_ops;
|
||||
static struct kmem_cache *amdgpu_userq_fence_slab;
|
||||
|
||||
int amdgpu_userq_fence_slab_init(void)
|
||||
{
|
||||
amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
|
||||
sizeof(struct amdgpu_userq_fence),
|
||||
0,
|
||||
SLAB_HWCACHE_ALIGN,
|
||||
NULL);
|
||||
if (!amdgpu_userq_fence_slab)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_userq_fence_slab_fini(void)
|
||||
{
|
||||
rcu_barrier();
|
||||
kmem_cache_destroy(amdgpu_userq_fence_slab);
|
||||
}
|
||||
|
||||
static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
|
||||
{
|
||||
if (!f || f->ops != &amdgpu_userq_fence_ops)
|
||||
return NULL;
|
||||
|
||||
return container_of(f, struct amdgpu_userq_fence, base);
|
||||
}
|
||||
|
||||
static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
|
||||
{
|
||||
return le64_to_cpu(*fence_drv->cpu_addr);
|
||||
}
|
||||
|
||||
int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
|
||||
struct amdgpu_usermode_queue *userq)
|
||||
{
|
||||
struct amdgpu_userq_fence_driver *fence_drv;
|
||||
unsigned long flags;
|
||||
int r;
|
||||
|
||||
fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
|
||||
if (!fence_drv)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Acquire seq64 memory */
|
||||
r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
|
||||
&fence_drv->cpu_addr);
|
||||
if (r)
|
||||
goto free_fence_drv;
|
||||
|
||||
memset(fence_drv->cpu_addr, 0, sizeof(u64));
|
||||
|
||||
kref_init(&fence_drv->refcount);
|
||||
INIT_LIST_HEAD(&fence_drv->fences);
|
||||
spin_lock_init(&fence_drv->fence_list_lock);
|
||||
|
||||
fence_drv->adev = adev;
|
||||
fence_drv->context = dma_fence_context_alloc(1);
|
||||
get_task_comm(fence_drv->timeline_name, current);
|
||||
|
||||
xa_lock_irqsave(&adev->userq_xa, flags);
|
||||
r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
|
||||
fence_drv, GFP_KERNEL));
|
||||
xa_unlock_irqrestore(&adev->userq_xa, flags);
|
||||
if (r)
|
||||
goto free_seq64;
|
||||
|
||||
userq->fence_drv = fence_drv;
|
||||
|
||||
return 0;
|
||||
|
||||
free_seq64:
|
||||
amdgpu_seq64_free(adev, fence_drv->va);
|
||||
free_fence_drv:
|
||||
kfree(fence_drv);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
|
||||
{
|
||||
struct amdgpu_userq_fence_driver *fence_drv;
|
||||
unsigned long index;
|
||||
|
||||
if (xa_empty(xa))
|
||||
return;
|
||||
|
||||
xa_lock(xa);
|
||||
xa_for_each(xa, index, fence_drv) {
|
||||
__xa_erase(xa, index);
|
||||
amdgpu_userq_fence_driver_put(fence_drv);
|
||||
}
|
||||
|
||||
xa_unlock(xa);
|
||||
}
|
||||
|
||||
void
|
||||
amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
|
||||
{
|
||||
amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
|
||||
xa_destroy(&userq->fence_drv_xa);
|
||||
/* Drop the fence_drv reference held by user queue */
|
||||
amdgpu_userq_fence_driver_put(userq->fence_drv);
|
||||
}
|
||||
|
||||
void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
|
||||
{
|
||||
struct amdgpu_userq_fence *userq_fence, *tmp;
|
||||
struct dma_fence *fence;
|
||||
u64 rptr;
|
||||
int i;
|
||||
|
||||
if (!fence_drv)
|
||||
return;
|
||||
|
||||
rptr = amdgpu_userq_fence_read(fence_drv);
|
||||
|
||||
spin_lock(&fence_drv->fence_list_lock);
|
||||
list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
|
||||
fence = &userq_fence->base;
|
||||
|
||||
if (rptr < fence->seqno)
|
||||
break;
|
||||
|
||||
dma_fence_signal(fence);
|
||||
|
||||
for (i = 0; i < userq_fence->fence_drv_array_count; i++)
|
||||
amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
|
||||
|
||||
list_del(&userq_fence->link);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
spin_unlock(&fence_drv->fence_list_lock);
|
||||
}
|
||||
|
||||
void amdgpu_userq_fence_driver_destroy(struct kref *ref)
|
||||
{
|
||||
struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
|
||||
struct amdgpu_userq_fence_driver,
|
||||
refcount);
|
||||
struct amdgpu_userq_fence_driver *xa_fence_drv;
|
||||
struct amdgpu_device *adev = fence_drv->adev;
|
||||
struct amdgpu_userq_fence *fence, *tmp;
|
||||
struct xarray *xa = &adev->userq_xa;
|
||||
unsigned long index, flags;
|
||||
struct dma_fence *f;
|
||||
|
||||
spin_lock(&fence_drv->fence_list_lock);
|
||||
list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
|
||||
f = &fence->base;
|
||||
|
||||
if (!dma_fence_is_signaled(f)) {
|
||||
dma_fence_set_error(f, -ECANCELED);
|
||||
dma_fence_signal(f);
|
||||
}
|
||||
|
||||
list_del(&fence->link);
|
||||
dma_fence_put(f);
|
||||
}
|
||||
spin_unlock(&fence_drv->fence_list_lock);
|
||||
|
||||
xa_lock_irqsave(xa, flags);
|
||||
xa_for_each(xa, index, xa_fence_drv)
|
||||
if (xa_fence_drv == fence_drv)
|
||||
__xa_erase(xa, index);
|
||||
xa_unlock_irqrestore(xa, flags);
|
||||
|
||||
/* Free seq64 memory */
|
||||
amdgpu_seq64_free(adev, fence_drv->va);
|
||||
kfree(fence_drv);
|
||||
}
|
||||
|
||||
void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
|
||||
{
|
||||
kref_get(&fence_drv->refcount);
|
||||
}
|
||||
|
||||
void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
|
||||
{
|
||||
kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
|
||||
}
|
||||
|
||||
static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
|
||||
{
|
||||
*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
|
||||
return *userq_fence ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
|
||||
struct amdgpu_userq_fence *userq_fence,
|
||||
u64 seq, struct dma_fence **f)
|
||||
{
|
||||
struct amdgpu_userq_fence_driver *fence_drv;
|
||||
struct dma_fence *fence;
|
||||
unsigned long flags;
|
||||
|
||||
fence_drv = userq->fence_drv;
|
||||
if (!fence_drv)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_init(&userq_fence->lock);
|
||||
INIT_LIST_HEAD(&userq_fence->link);
|
||||
fence = &userq_fence->base;
|
||||
userq_fence->fence_drv = fence_drv;
|
||||
|
||||
dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
|
||||
fence_drv->context, seq);
|
||||
|
||||
amdgpu_userq_fence_driver_get(fence_drv);
|
||||
dma_fence_get(fence);
|
||||
|
||||
if (!xa_empty(&userq->fence_drv_xa)) {
|
||||
struct amdgpu_userq_fence_driver *stored_fence_drv;
|
||||
unsigned long index, count = 0;
|
||||
int i = 0;
|
||||
|
||||
xa_lock(&userq->fence_drv_xa);
|
||||
xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
|
||||
count++;
|
||||
|
||||
userq_fence->fence_drv_array =
|
||||
kvmalloc_array(count,
|
||||
sizeof(struct amdgpu_userq_fence_driver *),
|
||||
GFP_ATOMIC);
|
||||
|
||||
if (userq_fence->fence_drv_array) {
|
||||
xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
|
||||
userq_fence->fence_drv_array[i] = stored_fence_drv;
|
||||
__xa_erase(&userq->fence_drv_xa, index);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
userq_fence->fence_drv_array_count = i;
|
||||
xa_unlock(&userq->fence_drv_xa);
|
||||
} else {
|
||||
userq_fence->fence_drv_array = NULL;
|
||||
userq_fence->fence_drv_array_count = 0;
|
||||
}
|
||||
|
||||
/* Check if hardware has already processed the job */
|
||||
spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
|
||||
if (!dma_fence_is_signaled_locked(fence))
|
||||
list_add_tail(&userq_fence->link, &fence_drv->fences);
|
||||
else
|
||||
dma_fence_put(fence);
|
||||
|
||||
spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
|
||||
|
||||
*f = fence;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
|
||||
{
|
||||
return "amdgpu_userq_fence";
|
||||
}
|
||||
|
||||
static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
|
||||
|
||||
return fence->fence_drv->timeline_name;
|
||||
}
|
||||
|
||||
static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
|
||||
struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
|
||||
u64 rptr, wptr;
|
||||
|
||||
rptr = amdgpu_userq_fence_read(fence_drv);
|
||||
wptr = fence->base.seqno;
|
||||
|
||||
if (rptr >= wptr)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_fence_free(struct rcu_head *rcu)
|
||||
{
|
||||
struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
|
||||
struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
|
||||
struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
|
||||
|
||||
/* Release the fence driver reference */
|
||||
amdgpu_userq_fence_driver_put(fence_drv);
|
||||
|
||||
kvfree(userq_fence->fence_drv_array);
|
||||
kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
|
||||
}
|
||||
|
||||
static void amdgpu_userq_fence_release(struct dma_fence *f)
|
||||
{
|
||||
call_rcu(&f->rcu, amdgpu_userq_fence_free);
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops amdgpu_userq_fence_ops = {
|
||||
.use_64bit_seqno = true,
|
||||
.get_driver_name = amdgpu_userq_fence_get_driver_name,
|
||||
.get_timeline_name = amdgpu_userq_fence_get_timeline_name,
|
||||
.signaled = amdgpu_userq_fence_signaled,
|
||||
.release = amdgpu_userq_fence_release,
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_userq_fence_read_wptr - Read the userq wptr value
|
||||
*
|
||||
* @queue: user mode queue structure pointer
|
||||
* @wptr: write pointer value
|
||||
*
|
||||
* Read the wptr value from userq's MQD. The userq signal IOCTL
|
||||
* creates a dma_fence for the shared buffers that expects the
|
||||
* RPTR value written to seq64 memory >= WPTR.
|
||||
*
|
||||
* Returns wptr value on success, error on failure.
|
||||
*/
|
||||
static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
|
||||
u64 *wptr)
|
||||
{
|
||||
struct amdgpu_bo_va_mapping *mapping;
|
||||
struct amdgpu_bo *bo;
|
||||
u64 addr, *ptr;
|
||||
int r;
|
||||
|
||||
r = amdgpu_bo_reserve(queue->vm->root.bo, false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
addr = queue->userq_prop->wptr_gpu_addr;
|
||||
addr &= AMDGPU_GMC_HOLE_MASK;
|
||||
|
||||
mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
|
||||
if (!mapping) {
|
||||
amdgpu_bo_unreserve(queue->vm->root.bo);
|
||||
DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
|
||||
amdgpu_bo_unreserve(queue->vm->root.bo);
|
||||
r = amdgpu_bo_reserve(bo, true);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to reserve userqueue wptr bo");
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_kmap(bo, (void **)&ptr);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed mapping the userqueue wptr bo");
|
||||
goto map_error;
|
||||
}
|
||||
|
||||
*wptr = le64_to_cpu(*ptr);
|
||||
|
||||
amdgpu_bo_kunmap(bo);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
amdgpu_bo_unref(&bo);
|
||||
|
||||
return 0;
|
||||
|
||||
map_error:
|
||||
amdgpu_bo_unreserve(bo);
|
||||
amdgpu_bo_unref(&bo);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
|
||||
{
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
|
||||
struct drm_amdgpu_userq_signal *args = data;
|
||||
struct drm_gem_object **gobj_write = NULL;
|
||||
struct drm_gem_object **gobj_read = NULL;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_fence *userq_fence;
|
||||
struct drm_syncobj **syncobj = NULL;
|
||||
u32 *bo_handles_write, num_write_bo_handles;
|
||||
u32 *syncobj_handles, num_syncobj_handles;
|
||||
u32 *bo_handles_read, num_read_bo_handles;
|
||||
int r, i, entry, rentry, wentry;
|
||||
struct dma_fence *fence;
|
||||
struct drm_exec exec;
|
||||
u64 wptr;
|
||||
|
||||
num_syncobj_handles = args->num_syncobj_handles;
|
||||
syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
|
||||
sizeof(u32) * num_syncobj_handles);
|
||||
if (IS_ERR(syncobj_handles))
|
||||
return PTR_ERR(syncobj_handles);
|
||||
|
||||
/* Array of pointers to the looked up syncobjs */
|
||||
syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
|
||||
if (!syncobj) {
|
||||
r = -ENOMEM;
|
||||
goto free_syncobj_handles;
|
||||
}
|
||||
|
||||
for (entry = 0; entry < num_syncobj_handles; entry++) {
|
||||
syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
|
||||
if (!syncobj[entry]) {
|
||||
r = -ENOENT;
|
||||
goto free_syncobj;
|
||||
}
|
||||
}
|
||||
|
||||
num_read_bo_handles = args->num_bo_read_handles;
|
||||
bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
|
||||
sizeof(u32) * num_read_bo_handles);
|
||||
if (IS_ERR(bo_handles_read)) {
|
||||
r = PTR_ERR(bo_handles_read);
|
||||
goto free_syncobj;
|
||||
}
|
||||
|
||||
/* Array of pointers to the GEM read objects */
|
||||
gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
|
||||
if (!gobj_read) {
|
||||
r = -ENOMEM;
|
||||
goto free_bo_handles_read;
|
||||
}
|
||||
|
||||
for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
|
||||
gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
|
||||
if (!gobj_read[rentry]) {
|
||||
r = -ENOENT;
|
||||
goto put_gobj_read;
|
||||
}
|
||||
}
|
||||
|
||||
num_write_bo_handles = args->num_bo_write_handles;
|
||||
bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
|
||||
sizeof(u32) * num_write_bo_handles);
|
||||
if (IS_ERR(bo_handles_write)) {
|
||||
r = PTR_ERR(bo_handles_write);
|
||||
goto put_gobj_read;
|
||||
}
|
||||
|
||||
/* Array of pointers to the GEM write objects */
|
||||
gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
|
||||
if (!gobj_write) {
|
||||
r = -ENOMEM;
|
||||
goto free_bo_handles_write;
|
||||
}
|
||||
|
||||
for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
|
||||
gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
|
||||
if (!gobj_write[wentry]) {
|
||||
r = -ENOENT;
|
||||
goto put_gobj_write;
|
||||
}
|
||||
}
|
||||
|
||||
/* Retrieve the user queue */
|
||||
queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
|
||||
if (!queue) {
|
||||
r = -ENOENT;
|
||||
goto put_gobj_write;
|
||||
}
|
||||
|
||||
r = amdgpu_userq_fence_read_wptr(queue, &wptr);
|
||||
if (r)
|
||||
goto put_gobj_write;
|
||||
|
||||
r = amdgpu_userq_fence_alloc(&userq_fence);
|
||||
if (r)
|
||||
goto put_gobj_write;
|
||||
|
||||
/* We are here means UQ is active, make sure the eviction fence is valid */
|
||||
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
|
||||
|
||||
/* Create a new fence */
|
||||
r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
|
||||
if (r) {
|
||||
mutex_unlock(&userq_mgr->userq_mutex);
|
||||
kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
|
||||
goto put_gobj_write;
|
||||
}
|
||||
|
||||
dma_fence_put(queue->last_fence);
|
||||
queue->last_fence = dma_fence_get(fence);
|
||||
mutex_unlock(&userq_mgr->userq_mutex);
|
||||
|
||||
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
|
||||
(num_read_bo_handles + num_write_bo_handles));
|
||||
|
||||
/* Lock all BOs with retry handling */
|
||||
drm_exec_until_all_locked(&exec) {
|
||||
r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
|
||||
drm_exec_retry_on_contention(&exec);
|
||||
if (r) {
|
||||
amdgpu_userq_fence_cleanup(fence);
|
||||
goto exec_fini;
|
||||
}
|
||||
|
||||
r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
|
||||
drm_exec_retry_on_contention(&exec);
|
||||
if (r) {
|
||||
amdgpu_userq_fence_cleanup(fence);
|
||||
goto exec_fini;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_read_bo_handles; i++) {
|
||||
if (!gobj_read || !gobj_read[i]->resv)
|
||||
continue;
|
||||
|
||||
dma_resv_add_fence(gobj_read[i]->resv, fence,
|
||||
DMA_RESV_USAGE_READ);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_write_bo_handles; i++) {
|
||||
if (!gobj_write || !gobj_write[i]->resv)
|
||||
continue;
|
||||
|
||||
dma_resv_add_fence(gobj_write[i]->resv, fence,
|
||||
DMA_RESV_USAGE_WRITE);
|
||||
}
|
||||
|
||||
/* Add the created fence to syncobj/BO's */
|
||||
for (i = 0; i < num_syncobj_handles; i++)
|
||||
drm_syncobj_replace_fence(syncobj[i], fence);
|
||||
|
||||
/* drop the reference acquired in fence creation function */
|
||||
dma_fence_put(fence);
|
||||
|
||||
exec_fini:
|
||||
drm_exec_fini(&exec);
|
||||
put_gobj_write:
|
||||
while (wentry-- > 0)
|
||||
drm_gem_object_put(gobj_write[wentry]);
|
||||
kfree(gobj_write);
|
||||
free_bo_handles_write:
|
||||
kfree(bo_handles_write);
|
||||
put_gobj_read:
|
||||
while (rentry-- > 0)
|
||||
drm_gem_object_put(gobj_read[rentry]);
|
||||
kfree(gobj_read);
|
||||
free_bo_handles_read:
|
||||
kfree(bo_handles_read);
|
||||
free_syncobj:
|
||||
while (entry-- > 0)
|
||||
if (syncobj[entry])
|
||||
drm_syncobj_put(syncobj[entry]);
|
||||
kfree(syncobj);
|
||||
free_syncobj_handles:
|
||||
kfree(syncobj_handles);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
|
||||
u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
|
||||
struct drm_amdgpu_userq_fence_info *fence_info = NULL;
|
||||
struct drm_amdgpu_userq_wait *wait_info = data;
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
|
||||
struct amdgpu_usermode_queue *waitq;
|
||||
struct drm_gem_object **gobj_write;
|
||||
struct drm_gem_object **gobj_read;
|
||||
struct dma_fence **fences = NULL;
|
||||
u16 num_points, num_fences = 0;
|
||||
int r, i, rentry, wentry, cnt;
|
||||
struct drm_exec exec;
|
||||
|
||||
num_read_bo_handles = wait_info->num_bo_read_handles;
|
||||
bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
|
||||
sizeof(u32) * num_read_bo_handles);
|
||||
if (IS_ERR(bo_handles_read))
|
||||
return PTR_ERR(bo_handles_read);
|
||||
|
||||
num_write_bo_handles = wait_info->num_bo_write_handles;
|
||||
bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
|
||||
sizeof(u32) * num_write_bo_handles);
|
||||
if (IS_ERR(bo_handles_write)) {
|
||||
r = PTR_ERR(bo_handles_write);
|
||||
goto free_bo_handles_read;
|
||||
}
|
||||
|
||||
num_syncobj = wait_info->num_syncobj_handles;
|
||||
syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
|
||||
sizeof(u32) * num_syncobj);
|
||||
if (IS_ERR(syncobj_handles)) {
|
||||
r = PTR_ERR(syncobj_handles);
|
||||
goto free_bo_handles_write;
|
||||
}
|
||||
|
||||
num_points = wait_info->num_syncobj_timeline_handles;
|
||||
timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
|
||||
sizeof(u32) * num_points);
|
||||
if (IS_ERR(timeline_handles)) {
|
||||
r = PTR_ERR(timeline_handles);
|
||||
goto free_syncobj_handles;
|
||||
}
|
||||
|
||||
timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
|
||||
sizeof(u32) * num_points);
|
||||
if (IS_ERR(timeline_points)) {
|
||||
r = PTR_ERR(timeline_points);
|
||||
goto free_timeline_handles;
|
||||
}
|
||||
|
||||
gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
|
||||
if (!gobj_read) {
|
||||
r = -ENOMEM;
|
||||
goto free_timeline_points;
|
||||
}
|
||||
|
||||
for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
|
||||
gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
|
||||
if (!gobj_read[rentry]) {
|
||||
r = -ENOENT;
|
||||
goto put_gobj_read;
|
||||
}
|
||||
}
|
||||
|
||||
gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
|
||||
if (!gobj_write) {
|
||||
r = -ENOMEM;
|
||||
goto put_gobj_read;
|
||||
}
|
||||
|
||||
for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
|
||||
gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
|
||||
if (!gobj_write[wentry]) {
|
||||
r = -ENOENT;
|
||||
goto put_gobj_write;
|
||||
}
|
||||
}
|
||||
|
||||
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
|
||||
(num_read_bo_handles + num_write_bo_handles));
|
||||
|
||||
/* Lock all BOs with retry handling */
|
||||
drm_exec_until_all_locked(&exec) {
|
||||
r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
|
||||
drm_exec_retry_on_contention(&exec);
|
||||
if (r) {
|
||||
drm_exec_fini(&exec);
|
||||
goto put_gobj_write;
|
||||
}
|
||||
|
||||
r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
|
||||
drm_exec_retry_on_contention(&exec);
|
||||
if (r) {
|
||||
drm_exec_fini(&exec);
|
||||
goto put_gobj_write;
|
||||
}
|
||||
}
|
||||
|
||||
if (!wait_info->num_fences) {
|
||||
if (num_points) {
|
||||
struct dma_fence_unwrap iter;
|
||||
struct dma_fence *fence;
|
||||
struct dma_fence *f;
|
||||
|
||||
for (i = 0; i < num_points; i++) {
|
||||
r = drm_syncobj_find_fence(filp, timeline_handles[i],
|
||||
timeline_points[i],
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
|
||||
&fence);
|
||||
if (r)
|
||||
goto exec_fini;
|
||||
|
||||
dma_fence_unwrap_for_each(f, &iter, fence)
|
||||
num_fences++;
|
||||
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
}
|
||||
|
||||
/* Count syncobj's fence */
|
||||
for (i = 0; i < num_syncobj; i++) {
|
||||
struct dma_fence *fence;
|
||||
|
||||
r = drm_syncobj_find_fence(filp, syncobj_handles[i],
|
||||
0,
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
|
||||
&fence);
|
||||
if (r)
|
||||
goto exec_fini;
|
||||
|
||||
num_fences++;
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
/* Count GEM objects fence */
|
||||
for (i = 0; i < num_read_bo_handles; i++) {
|
||||
struct dma_resv_iter resv_cursor;
|
||||
struct dma_fence *fence;
|
||||
|
||||
dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
|
||||
DMA_RESV_USAGE_READ, fence)
|
||||
num_fences++;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_write_bo_handles; i++) {
|
||||
struct dma_resv_iter resv_cursor;
|
||||
struct dma_fence *fence;
|
||||
|
||||
dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
|
||||
DMA_RESV_USAGE_WRITE, fence)
|
||||
num_fences++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Passing num_fences = 0 means that userspace doesn't want to
|
||||
* retrieve userq_fence_info. If num_fences = 0 we skip filling
|
||||
* userq_fence_info and return the actual number of fences on
|
||||
* args->num_fences.
|
||||
*/
|
||||
wait_info->num_fences = num_fences;
|
||||
} else {
|
||||
/* Array of fence info */
|
||||
fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
|
||||
if (!fence_info) {
|
||||
r = -ENOMEM;
|
||||
goto exec_fini;
|
||||
}
|
||||
|
||||
/* Array of fences */
|
||||
fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
|
||||
if (!fences) {
|
||||
r = -ENOMEM;
|
||||
goto free_fence_info;
|
||||
}
|
||||
|
||||
/* Retrieve GEM read objects fence */
|
||||
for (i = 0; i < num_read_bo_handles; i++) {
|
||||
struct dma_resv_iter resv_cursor;
|
||||
struct dma_fence *fence;
|
||||
|
||||
dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
|
||||
DMA_RESV_USAGE_READ, fence) {
|
||||
if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
|
||||
r = -EINVAL;
|
||||
goto free_fences;
|
||||
}
|
||||
|
||||
fences[num_fences++] = fence;
|
||||
dma_fence_get(fence);
|
||||
}
|
||||
}
|
||||
|
||||
/* Retrieve GEM write objects fence */
|
||||
for (i = 0; i < num_write_bo_handles; i++) {
|
||||
struct dma_resv_iter resv_cursor;
|
||||
struct dma_fence *fence;
|
||||
|
||||
dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
|
||||
DMA_RESV_USAGE_WRITE, fence) {
|
||||
if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
|
||||
r = -EINVAL;
|
||||
goto free_fences;
|
||||
}
|
||||
|
||||
fences[num_fences++] = fence;
|
||||
dma_fence_get(fence);
|
||||
}
|
||||
}
|
||||
|
||||
if (num_points) {
|
||||
struct dma_fence_unwrap iter;
|
||||
struct dma_fence *fence;
|
||||
struct dma_fence *f;
|
||||
|
||||
for (i = 0; i < num_points; i++) {
|
||||
r = drm_syncobj_find_fence(filp, timeline_handles[i],
|
||||
timeline_points[i],
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
|
||||
&fence);
|
||||
if (r)
|
||||
goto free_fences;
|
||||
|
||||
dma_fence_unwrap_for_each(f, &iter, fence) {
|
||||
if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
|
||||
r = -EINVAL;
|
||||
goto free_fences;
|
||||
}
|
||||
|
||||
dma_fence_get(f);
|
||||
fences[num_fences++] = f;
|
||||
}
|
||||
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
}
|
||||
|
||||
/* Retrieve syncobj's fence */
|
||||
for (i = 0; i < num_syncobj; i++) {
|
||||
struct dma_fence *fence;
|
||||
|
||||
r = drm_syncobj_find_fence(filp, syncobj_handles[i],
|
||||
0,
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
|
||||
&fence);
|
||||
if (r)
|
||||
goto free_fences;
|
||||
|
||||
if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
|
||||
r = -EINVAL;
|
||||
goto free_fences;
|
||||
}
|
||||
|
||||
fences[num_fences++] = fence;
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep only the latest fences to reduce the number of values
|
||||
* given back to userspace.
|
||||
*/
|
||||
num_fences = dma_fence_dedup_array(fences, num_fences);
|
||||
|
||||
waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
|
||||
if (!waitq)
|
||||
goto free_fences;
|
||||
|
||||
for (i = 0, cnt = 0; i < num_fences; i++) {
|
||||
struct amdgpu_userq_fence_driver *fence_drv;
|
||||
struct amdgpu_userq_fence *userq_fence;
|
||||
u32 index;
|
||||
|
||||
userq_fence = to_amdgpu_userq_fence(fences[i]);
|
||||
if (!userq_fence) {
|
||||
/*
|
||||
* Just waiting on other driver fences should
|
||||
* be good for now
|
||||
*/
|
||||
r = dma_fence_wait(fences[i], true);
|
||||
if (r) {
|
||||
dma_fence_put(fences[i]);
|
||||
goto free_fences;
|
||||
}
|
||||
|
||||
dma_fence_put(fences[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
fence_drv = userq_fence->fence_drv;
|
||||
/*
|
||||
* We need to make sure the user queue release their reference
|
||||
* to the fence drivers at some point before queue destruction.
|
||||
* Otherwise, we would gather those references until we don't
|
||||
* have any more space left and crash.
|
||||
*/
|
||||
r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
|
||||
xa_limit_32b, GFP_KERNEL);
|
||||
if (r)
|
||||
goto free_fences;
|
||||
|
||||
amdgpu_userq_fence_driver_get(fence_drv);
|
||||
|
||||
/* Store drm syncobj's gpu va address and value */
|
||||
fence_info[cnt].va = fence_drv->va;
|
||||
fence_info[cnt].value = fences[i]->seqno;
|
||||
|
||||
dma_fence_put(fences[i]);
|
||||
/* Increment the actual userq fence count */
|
||||
cnt++;
|
||||
}
|
||||
|
||||
wait_info->num_fences = cnt;
|
||||
/* Copy userq fence info to user space */
|
||||
if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
|
||||
fence_info, wait_info->num_fences * sizeof(*fence_info))) {
|
||||
r = -EFAULT;
|
||||
goto free_fences;
|
||||
}
|
||||
|
||||
kfree(fences);
|
||||
kfree(fence_info);
|
||||
}
|
||||
|
||||
drm_exec_fini(&exec);
|
||||
for (i = 0; i < num_read_bo_handles; i++)
|
||||
drm_gem_object_put(gobj_read[i]);
|
||||
kfree(gobj_read);
|
||||
|
||||
for (i = 0; i < num_write_bo_handles; i++)
|
||||
drm_gem_object_put(gobj_write[i]);
|
||||
kfree(gobj_write);
|
||||
|
||||
kfree(timeline_points);
|
||||
kfree(timeline_handles);
|
||||
kfree(syncobj_handles);
|
||||
kfree(bo_handles_write);
|
||||
kfree(bo_handles_read);
|
||||
|
||||
return 0;
|
||||
|
||||
free_fences:
|
||||
while (num_fences-- > 0)
|
||||
dma_fence_put(fences[num_fences]);
|
||||
kfree(fences);
|
||||
free_fence_info:
|
||||
kfree(fence_info);
|
||||
exec_fini:
|
||||
drm_exec_fini(&exec);
|
||||
put_gobj_write:
|
||||
while (wentry-- > 0)
|
||||
drm_gem_object_put(gobj_write[wentry]);
|
||||
kfree(gobj_write);
|
||||
put_gobj_read:
|
||||
while (rentry-- > 0)
|
||||
drm_gem_object_put(gobj_read[rentry]);
|
||||
kfree(gobj_read);
|
||||
free_timeline_points:
|
||||
kfree(timeline_points);
|
||||
free_timeline_handles:
|
||||
kfree(timeline_handles);
|
||||
free_syncobj_handles:
|
||||
kfree(syncobj_handles);
|
||||
free_bo_handles_write:
|
||||
kfree(bo_handles_write);
|
||||
free_bo_handles_read:
|
||||
kfree(bo_handles_read);
|
||||
|
||||
return r;
|
||||
}
|
||||
76
drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
Normal file
76
drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __AMDGPU_USERQ_FENCE_H__
|
||||
#define __AMDGPU_USERQ_FENCE_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "amdgpu_userq.h"
|
||||
|
||||
struct amdgpu_userq_fence {
|
||||
struct dma_fence base;
|
||||
/*
|
||||
* This lock is necessary to synchronize the
|
||||
* userqueue dma fence operations.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
struct list_head link;
|
||||
unsigned long fence_drv_array_count;
|
||||
struct amdgpu_userq_fence_driver *fence_drv;
|
||||
struct amdgpu_userq_fence_driver **fence_drv_array;
|
||||
};
|
||||
|
||||
struct amdgpu_userq_fence_driver {
|
||||
struct kref refcount;
|
||||
u64 va;
|
||||
u64 gpu_addr;
|
||||
u64 *cpu_addr;
|
||||
u64 context;
|
||||
/*
|
||||
* This lock is necesaary to synchronize the access
|
||||
* to the fences list by the fence driver.
|
||||
*/
|
||||
spinlock_t fence_list_lock;
|
||||
struct list_head fences;
|
||||
struct amdgpu_device *adev;
|
||||
char timeline_name[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
int amdgpu_userq_fence_slab_init(void);
|
||||
void amdgpu_userq_fence_slab_fini(void);
|
||||
|
||||
void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
|
||||
void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
|
||||
int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
|
||||
struct amdgpu_usermode_queue *userq);
|
||||
void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
|
||||
void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
|
||||
void amdgpu_userq_fence_driver_destroy(struct kref *ref);
|
||||
int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp);
|
||||
int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp);
|
||||
|
||||
#endif
|
||||
@@ -353,9 +353,9 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
|
||||
|
||||
cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
|
||||
|
||||
/* err_event_athub will corrupt VCPU buffer, so we need to
|
||||
/* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
|
||||
* restore fw data and clear buffer in amdgpu_vcn_resume() */
|
||||
if (in_ras_intr)
|
||||
if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
|
||||
return 0;
|
||||
|
||||
return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
|
||||
|
||||
@@ -66,7 +66,6 @@
|
||||
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
|
||||
|
||||
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
|
||||
#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
|
||||
#define VCN_VID_IP_ADDRESS_2_0 0x0
|
||||
#define VCN_AON_IP_ADDRESS_2_0 0x30000
|
||||
|
||||
|
||||
@@ -1323,6 +1323,9 @@ static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo
|
||||
{
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (!virt->ops || !virt->ops->req_ras_err_count)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
|
||||
* will ignore incoming guest messages. Ratelimit the guest messages to
|
||||
* prevent guest self DOS.
|
||||
@@ -1378,14 +1381,16 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
|
||||
used_size = host_telemetry->header.used_size;
|
||||
|
||||
if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
|
||||
cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
|
||||
if (!cper_dump)
|
||||
return -ENOMEM;
|
||||
|
||||
if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0))
|
||||
if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*more = cper_dump->more;
|
||||
|
||||
@@ -1425,7 +1430,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
|
||||
int ret = 0;
|
||||
uint32_t more = 0;
|
||||
|
||||
if (!amdgpu_sriov_ras_cper_en(adev))
|
||||
if (!virt->ops || !virt->ops->req_ras_cper_dump)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
do {
|
||||
@@ -1434,7 +1439,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
|
||||
adev, virt->fw_reserve.ras_telemetry, &more);
|
||||
else
|
||||
ret = 0;
|
||||
} while (more);
|
||||
} while (more && !ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1444,6 +1449,9 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
int ret = 0;
|
||||
|
||||
if (!amdgpu_sriov_ras_cper_en(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
|
||||
down_read_trylock(&adev->reset_domain->sem)) {
|
||||
mutex_lock(&virt->ras.ras_telemetry_mutex);
|
||||
@@ -1480,3 +1488,16 @@ bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_virt_request_bad_pages() - request bad pages
|
||||
* @adev: amdgpu device.
|
||||
* Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
|
||||
*/
|
||||
void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (virt->ops && virt->ops->req_bad_pages)
|
||||
virt->ops->req_bad_pages(adev);
|
||||
}
|
||||
|
||||
@@ -97,6 +97,7 @@ struct amdgpu_virt_ops {
|
||||
bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
|
||||
int (*req_ras_err_count)(struct amdgpu_device *adev);
|
||||
int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
|
||||
int (*req_bad_pages)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -146,11 +147,13 @@ enum AMDGIM_FEATURE_FLAG {
|
||||
|
||||
enum AMDGIM_REG_ACCESS_FLAG {
|
||||
/* Use PSP to program IH_RB_CNTL */
|
||||
AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
|
||||
AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
|
||||
/* Use RLC to program MMHUB regs */
|
||||
AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
|
||||
AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
|
||||
/* Use RLC to program GC regs */
|
||||
AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
|
||||
AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
|
||||
/* Use PSP to program L1_TLB_CNTL*/
|
||||
AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
|
||||
};
|
||||
|
||||
struct amdgim_pf2vf_info_v1 {
|
||||
@@ -260,7 +263,10 @@ struct amdgpu_virt {
|
||||
uint32_t reg_val_offs;
|
||||
struct amdgpu_irq_src ack_irq;
|
||||
struct amdgpu_irq_src rcv_irq;
|
||||
|
||||
struct work_struct flr_work;
|
||||
struct work_struct bad_pages_work;
|
||||
|
||||
struct amdgpu_mm_table mm_table;
|
||||
const struct amdgpu_virt_ops *ops;
|
||||
struct amdgpu_vf_error_buffer vf_errors;
|
||||
@@ -330,6 +336,10 @@ struct amdgpu_video_codec_info;
|
||||
(amdgpu_sriov_vf((adev)) && \
|
||||
((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
|
||||
|
||||
#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
|
||||
(amdgpu_sriov_vf((adev)) && \
|
||||
((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
|
||||
|
||||
#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
|
||||
(amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
|
||||
|
||||
@@ -423,4 +433,5 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
|
||||
int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block);
|
||||
void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
|
||||
#endif
|
||||
|
||||
@@ -787,7 +787,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
|
||||
ring->funcs->emit_wreg;
|
||||
|
||||
cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
|
||||
cleaner_shader_needed = job->run_cleaner_shader &&
|
||||
adev->gfx.enable_cleaner_shader &&
|
||||
ring->funcs->emit_cleaner_shader && job->base.s_fence &&
|
||||
&job->base.s_fence->scheduled == isolation->spearhead;
|
||||
|
||||
@@ -817,7 +818,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
|
||||
adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
|
||||
|
||||
if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
|
||||
if (ring->funcs->emit_gds_switch &&
|
||||
gds_switch_needed) {
|
||||
amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
|
||||
job->gds_size, job->gws_base,
|
||||
|
||||
@@ -296,15 +296,27 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
|
||||
|
||||
static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
|
||||
{
|
||||
const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070;
|
||||
const int xgmi_inst = 2;
|
||||
u32 link_inst;
|
||||
const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
|
||||
const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
|
||||
0x11b00070 };
|
||||
u32 i, n;
|
||||
u64 addr;
|
||||
|
||||
link_inst = global_link_num % xgmi_inst;
|
||||
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
|
||||
case IP_VERSION(6, 4, 0):
|
||||
n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
|
||||
addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
|
||||
break;
|
||||
case IP_VERSION(6, 4, 1):
|
||||
n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
|
||||
addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
|
||||
break;
|
||||
default:
|
||||
return U32_MAX;
|
||||
}
|
||||
|
||||
addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) +
|
||||
adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst);
|
||||
i = global_link_num / n;
|
||||
addr += adev->asic_funcs->encode_ext_smn_addressing(i);
|
||||
|
||||
return RREG32_PCIE_EXT(addr);
|
||||
}
|
||||
|
||||
@@ -109,10 +109,11 @@ union amd_sriov_msg_feature_flags {
|
||||
|
||||
union amd_sriov_reg_access_flags {
|
||||
struct {
|
||||
uint32_t vf_reg_access_ih : 1;
|
||||
uint32_t vf_reg_access_mmhub : 1;
|
||||
uint32_t vf_reg_access_gc : 1;
|
||||
uint32_t reserved : 29;
|
||||
uint32_t vf_reg_access_ih : 1;
|
||||
uint32_t vf_reg_access_mmhub : 1;
|
||||
uint32_t vf_reg_access_gc : 1;
|
||||
uint32_t vf_reg_access_l1_tlb_cntl : 1;
|
||||
uint32_t reserved : 28;
|
||||
} flags;
|
||||
uint32_t all;
|
||||
};
|
||||
@@ -330,6 +331,7 @@ enum amd_sriov_mailbox_request_message {
|
||||
MB_REQ_MSG_RAS_POISON = 202,
|
||||
MB_REQ_RAS_ERROR_COUNT = 203,
|
||||
MB_REQ_RAS_CPER_DUMP = 204,
|
||||
MB_REQ_RAS_BAD_PAGES = 205,
|
||||
};
|
||||
|
||||
/* mailbox message send from host to guest */
|
||||
@@ -347,6 +349,9 @@ enum amd_sriov_mailbox_response_message {
|
||||
MB_RES_MSG_GPU_RMA = 10,
|
||||
MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,
|
||||
MB_REQ_RAS_CPER_DUMP_READY = 14,
|
||||
MB_RES_MSG_RAS_BAD_PAGES_READY = 15,
|
||||
MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16,
|
||||
MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17,
|
||||
MB_RES_MSG_TEXT_MESSAGE = 255
|
||||
};
|
||||
|
||||
|
||||
@@ -1444,6 +1444,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
|
||||
if (vbios_str == NULL)
|
||||
vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
|
||||
}
|
||||
OPTIMIZER_HIDE_VAR(vbios_str);
|
||||
if (vbios_str != NULL && *vbios_str == 0)
|
||||
vbios_str++;
|
||||
|
||||
|
||||
@@ -56,6 +56,8 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
|
||||
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
|
||||
static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
|
||||
|
||||
u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
|
||||
MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin");
|
||||
@@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
|
||||
MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
|
||||
MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
|
||||
|
||||
u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
|
||||
|
||||
|
||||
static void cik_sdma_free_microcode(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
@@ -993,14 +992,9 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
r = cik_sdma_start(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return r;
|
||||
return cik_sdma_start(adev);
|
||||
}
|
||||
|
||||
static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
|
||||
@@ -1040,14 +1034,10 @@ static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
|
||||
static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
unsigned i;
|
||||
u32 tmp;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
|
||||
SRBM_STATUS2__SDMA1_BUSY_MASK);
|
||||
|
||||
if (!tmp)
|
||||
if (cik_sdma_is_idle(ip_block))
|
||||
return 0;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
@@ -60,9 +60,6 @@
|
||||
#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
|
||||
#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
|
||||
|
||||
#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
|
||||
#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
|
||||
|
||||
#define PIPEID(x) ((x) << 0)
|
||||
#define MEID(x) ((x) << 2)
|
||||
#define VMID(x) ((x) << 4)
|
||||
|
||||
@@ -3075,7 +3075,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3227,7 +3227,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -3206,7 +3206,7 @@ static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3358,7 +3358,7 @@ static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3488,8 +3488,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
|
||||
.set_powergating_state = dce_v11_0_set_powergating_state,
|
||||
};
|
||||
|
||||
static void
|
||||
dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
|
||||
static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
|
||||
struct drm_display_mode *mode,
|
||||
struct drm_display_mode *adjusted_mode)
|
||||
{
|
||||
|
||||
@@ -287,7 +287,7 @@ static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -412,7 +412,7 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
|
||||
{
|
||||
if (!render)
|
||||
WREG32(mmVGA_RENDER_CONTROL,
|
||||
RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
|
||||
RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
|
||||
}
|
||||
|
||||
static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
|
||||
@@ -1011,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
|
||||
/* select wm A */
|
||||
arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
|
||||
tmp = arb_control3;
|
||||
tmp &= ~LATENCY_WATERMARK_MASK(3);
|
||||
tmp |= LATENCY_WATERMARK_MASK(1);
|
||||
tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
|
||||
tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
|
||||
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
|
||||
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
|
||||
(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
|
||||
/* select wm B */
|
||||
tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
|
||||
tmp &= ~LATENCY_WATERMARK_MASK(3);
|
||||
tmp |= LATENCY_WATERMARK_MASK(2);
|
||||
tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
|
||||
tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
|
||||
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
|
||||
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
|
||||
@@ -1089,7 +1089,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
|
||||
DC_LB_MEMORY_CONFIG(tmp));
|
||||
(tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
|
||||
|
||||
WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
|
||||
(buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
|
||||
@@ -1306,6 +1306,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
|
||||
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
|
||||
u32 offset;
|
||||
struct drm_connector *connector;
|
||||
struct drm_connector_list_iter iter;
|
||||
struct amdgpu_connector *amdgpu_connector = NULL;
|
||||
@@ -1327,6 +1328,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
|
||||
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
|
||||
};
|
||||
|
||||
if (!dig || !dig->afmt || !dig->afmt->pin)
|
||||
return;
|
||||
|
||||
offset = dig->afmt->pin->offset;
|
||||
|
||||
drm_connector_list_iter_begin(dev, &iter);
|
||||
drm_for_each_connector_iter(connector, &iter) {
|
||||
if (connector->encoder == encoder) {
|
||||
@@ -1348,7 +1354,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
|
||||
u32 tmp = 0;
|
||||
u32 value = 0;
|
||||
u8 stereo_freqs = 0;
|
||||
int max_channels = -1;
|
||||
int j;
|
||||
@@ -1358,12 +1364,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
|
||||
|
||||
if (sad->format == eld_reg_to_type[i][1]) {
|
||||
if (sad->channels > max_channels) {
|
||||
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
|
||||
MAX_CHANNELS, sad->channels);
|
||||
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
|
||||
DESCRIPTOR_BYTE_2, sad->byte2);
|
||||
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
|
||||
SUPPORTED_FREQUENCIES, sad->freq);
|
||||
value = (sad->channels <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
|
||||
(sad->byte2 <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
|
||||
(sad->freq <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
|
||||
max_channels = sad->channels;
|
||||
}
|
||||
|
||||
@@ -1374,13 +1380,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
|
||||
}
|
||||
}
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
|
||||
SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
|
||||
WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
|
||||
value |= (stereo_freqs <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
|
||||
|
||||
WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
|
||||
}
|
||||
|
||||
kfree(sads);
|
||||
|
||||
}
|
||||
|
||||
static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
|
||||
@@ -1886,7 +1892,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
|
||||
struct amdgpu_bo *abo;
|
||||
uint64_t fb_location, tiling_flags;
|
||||
uint32_t fb_format, fb_pitch_pixels, pipe_config;
|
||||
u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
|
||||
u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
u32 viewport_w, viewport_h;
|
||||
int r;
|
||||
bool bypass_lut = false;
|
||||
@@ -1926,76 +1932,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
|
||||
|
||||
switch (target_fb->format->format) {
|
||||
case DRM_FORMAT_C8:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_INDEXED));
|
||||
fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
break;
|
||||
case DRM_FORMAT_XRGB4444:
|
||||
case DRM_FORMAT_ARGB4444:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
|
||||
fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
|
||||
fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
break;
|
||||
case DRM_FORMAT_XRGB1555:
|
||||
case DRM_FORMAT_ARGB1555:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
|
||||
fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
|
||||
fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
break;
|
||||
case DRM_FORMAT_BGRX5551:
|
||||
case DRM_FORMAT_BGRA5551:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
|
||||
fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
|
||||
fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
break;
|
||||
case DRM_FORMAT_RGB565:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_ARGB565));
|
||||
fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
|
||||
fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
break;
|
||||
case DRM_FORMAT_XRGB8888:
|
||||
case DRM_FORMAT_ARGB8888:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
|
||||
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
|
||||
fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
break;
|
||||
case DRM_FORMAT_XRGB2101010:
|
||||
case DRM_FORMAT_ARGB2101010:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
|
||||
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
|
||||
fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
|
||||
bypass_lut = true;
|
||||
break;
|
||||
case DRM_FORMAT_BGRX1010102:
|
||||
case DRM_FORMAT_BGRA1010102:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
|
||||
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
|
||||
fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
|
||||
bypass_lut = true;
|
||||
break;
|
||||
case DRM_FORMAT_XBGR8888:
|
||||
case DRM_FORMAT_ABGR8888:
|
||||
fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
|
||||
GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
|
||||
fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
|
||||
GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
|
||||
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
|
||||
(GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
|
||||
fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
@@ -2013,18 +2019,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
|
||||
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
|
||||
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
|
||||
|
||||
fb_format |= GRPH_NUM_BANKS(num_banks);
|
||||
fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
|
||||
fb_format |= GRPH_TILE_SPLIT(tile_split);
|
||||
fb_format |= GRPH_BANK_WIDTH(bankw);
|
||||
fb_format |= GRPH_BANK_HEIGHT(bankh);
|
||||
fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
|
||||
fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
|
||||
fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
|
||||
fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
|
||||
fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
|
||||
fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
|
||||
fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
|
||||
} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
|
||||
fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
|
||||
fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
|
||||
}
|
||||
|
||||
pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
|
||||
fb_format |= GRPH_PIPE_CONFIG(pipe_config);
|
||||
fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
|
||||
|
||||
dce_v6_0_vga_enable(crtc, false);
|
||||
|
||||
@@ -2040,7 +2046,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
|
||||
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
|
||||
(u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
|
||||
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
|
||||
(u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
|
||||
(u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
|
||||
WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
|
||||
WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
|
||||
|
||||
@@ -2108,14 +2114,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
|
||||
|
||||
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
|
||||
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
|
||||
INTERLEAVE_EN);
|
||||
DATA_FORMAT__INTERLEAVE_EN_MASK);
|
||||
else
|
||||
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
|
||||
}
|
||||
|
||||
static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
|
||||
{
|
||||
|
||||
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
|
||||
struct drm_device *dev = crtc->dev;
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
@@ -2125,15 +2130,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
|
||||
DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
|
||||
|
||||
WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
|
||||
(0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
|
||||
((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
|
||||
(INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
|
||||
WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
|
||||
WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
|
||||
WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
|
||||
(0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
|
||||
((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
|
||||
(INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
|
||||
|
||||
WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
|
||||
|
||||
@@ -2160,19 +2165,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
|
||||
}
|
||||
|
||||
WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
|
||||
(0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
|
||||
ICON_DEGAMMA_MODE(0) |
|
||||
(0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
|
||||
((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
|
||||
(DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
|
||||
(DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
|
||||
(DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
|
||||
WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
|
||||
(0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
|
||||
((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
|
||||
(GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
|
||||
WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
|
||||
(0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
|
||||
((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
|
||||
(REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
|
||||
WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
|
||||
(0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
|
||||
((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
|
||||
(OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
|
||||
/* XXX match this to the depth of the crtc fmt block, move to modeset? */
|
||||
WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
|
||||
|
||||
@@ -2267,8 +2272,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
|
||||
WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
|
||||
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
|
||||
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
|
||||
@@ -2285,7 +2288,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
|
||||
CUR_CONTROL__CURSOR_EN_MASK |
|
||||
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
|
||||
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
|
||||
|
||||
}
|
||||
|
||||
static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
|
||||
@@ -2596,7 +2598,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
|
||||
const struct drm_display_mode *mode,
|
||||
struct drm_display_mode *adjusted_mode)
|
||||
{
|
||||
|
||||
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
|
||||
struct drm_device *dev = crtc->dev;
|
||||
struct drm_encoder *encoder;
|
||||
@@ -2669,7 +2670,7 @@ static void dce_v6_0_panic_flush(struct drm_plane *plane)
|
||||
|
||||
/* Disable DC tiling */
|
||||
fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
|
||||
fb_format &= ~GRPH_ARRAY_MODE(0x7);
|
||||
fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
|
||||
WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
|
||||
|
||||
}
|
||||
@@ -2745,7 +2746,6 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
|
||||
static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
int r, i;
|
||||
bool ret;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
for (i = 0; i < adev->mode_info.num_crtc; i++) {
|
||||
@@ -2789,8 +2789,7 @@ static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
return r;
|
||||
}
|
||||
|
||||
ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
|
||||
if (ret)
|
||||
if (amdgpu_atombios_get_connector_info_from_object_table(adev))
|
||||
amdgpu_display_print_display_setup(adev_to_drm(adev));
|
||||
else
|
||||
return -EINVAL;
|
||||
@@ -2986,12 +2985,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
|
||||
switch (state) {
|
||||
case AMDGPU_IRQ_STATE_DISABLE:
|
||||
interrupt_mask = RREG32(mmINT_MASK + reg_block);
|
||||
interrupt_mask &= ~VBLANK_INT_MASK;
|
||||
interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
|
||||
WREG32(mmINT_MASK + reg_block, interrupt_mask);
|
||||
break;
|
||||
case AMDGPU_IRQ_STATE_ENABLE:
|
||||
interrupt_mask = RREG32(mmINT_MASK + reg_block);
|
||||
interrupt_mask |= VBLANK_INT_MASK;
|
||||
interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
|
||||
WREG32(mmINT_MASK + reg_block, interrupt_mask);
|
||||
break;
|
||||
default:
|
||||
@@ -3006,28 +3005,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
|
||||
|
||||
}
|
||||
|
||||
static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
|
||||
static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
unsigned hpd,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
u32 dc_hpd_int_cntl;
|
||||
|
||||
if (type >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", type);
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (state) {
|
||||
case AMDGPU_IRQ_STATE_DISABLE:
|
||||
dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
|
||||
dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
|
||||
dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
|
||||
dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
|
||||
break;
|
||||
case AMDGPU_IRQ_STATE_ENABLE:
|
||||
dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
|
||||
dc_hpd_int_cntl |= DC_HPDx_INT_EN;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
|
||||
dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
|
||||
dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
|
||||
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -3036,7 +3035,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
|
||||
static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
@@ -3096,7 +3095,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
|
||||
switch (entry->src_data[0]) {
|
||||
case 0: /* vblank */
|
||||
if (disp_int & interrupt_status_offsets[crtc].vblank)
|
||||
WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
|
||||
WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
|
||||
else
|
||||
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
|
||||
|
||||
@@ -3107,7 +3106,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
|
||||
break;
|
||||
case 1: /* vline */
|
||||
if (disp_int & interrupt_status_offsets[crtc].vline)
|
||||
WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
|
||||
WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
|
||||
else
|
||||
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
|
||||
|
||||
@@ -3121,7 +3120,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
|
||||
static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
@@ -3172,7 +3171,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
|
||||
|
||||
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
|
||||
works = amdgpu_crtc->pflip_works;
|
||||
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
|
||||
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
|
||||
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
|
||||
"AMDGPU_FLIP_SUBMITTED(%d)\n",
|
||||
amdgpu_crtc->pflip_status,
|
||||
@@ -3249,12 +3248,10 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
|
||||
.set_powergating_state = dce_v6_0_set_powergating_state,
|
||||
};
|
||||
|
||||
static void
|
||||
dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
|
||||
static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
|
||||
struct drm_display_mode *mode,
|
||||
struct drm_display_mode *adjusted_mode)
|
||||
{
|
||||
|
||||
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
|
||||
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
|
||||
|
||||
@@ -3274,7 +3271,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
|
||||
|
||||
static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
|
||||
{
|
||||
|
||||
struct amdgpu_device *adev = drm_to_adev(encoder->dev);
|
||||
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
|
||||
struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
|
||||
@@ -3314,7 +3310,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
|
||||
|
||||
static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
|
||||
{
|
||||
|
||||
struct drm_device *dev = encoder->dev;
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
|
||||
@@ -3325,7 +3320,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
|
||||
|
||||
static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
|
||||
{
|
||||
|
||||
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
|
||||
struct amdgpu_encoder_atom_dig *dig;
|
||||
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
|
||||
@@ -3541,17 +3535,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
|
||||
.set = dce_v6_0_set_crtc_interrupt_state,
|
||||
.set = dce_v6_0_set_crtc_irq_state,
|
||||
.process = dce_v6_0_crtc_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
|
||||
.set = dce_v6_0_set_pageflip_interrupt_state,
|
||||
.set = dce_v6_0_set_pageflip_irq_state,
|
||||
.process = dce_v6_0_pageflip_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
|
||||
.set = dce_v6_0_set_hpd_interrupt_state,
|
||||
.set = dce_v6_0_set_hpd_irq_state,
|
||||
.process = dce_v6_0_hpd_irq,
|
||||
};
|
||||
|
||||
|
||||
@@ -271,7 +271,7 @@ static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
|
||||
u32 tmp;
|
||||
|
||||
if (hpd >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", hpd);
|
||||
DRM_DEBUG("invalid hpd %d\n", hpd);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3021,7 +3021,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
|
||||
static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
|
||||
static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
@@ -3029,7 +3029,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
|
||||
u32 dc_hpd_int_cntl;
|
||||
|
||||
if (type >= adev->mode_info.num_hpd) {
|
||||
DRM_DEBUG("invalid hdp %d\n", type);
|
||||
DRM_DEBUG("invalid hpd %d\n", type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3051,7 +3051,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
|
||||
static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
@@ -3136,7 +3136,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
|
||||
static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
@@ -3547,17 +3547,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
|
||||
.set = dce_v8_0_set_crtc_interrupt_state,
|
||||
.set = dce_v8_0_set_crtc_irq_state,
|
||||
.process = dce_v8_0_crtc_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
|
||||
.set = dce_v8_0_set_pageflip_interrupt_state,
|
||||
.set = dce_v8_0_set_pageflip_irq_state,
|
||||
.process = dce_v8_0_pageflip_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
|
||||
.set = dce_v8_0_set_hpd_interrupt_state,
|
||||
.set = dce_v8_0_set_hpd_irq_state,
|
||||
.process = dce_v8_0_hpd_irq,
|
||||
};
|
||||
|
||||
|
||||
@@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
|
||||
/* SE status registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
|
||||
@@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
};
|
||||
|
||||
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
|
||||
@@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
|
||||
/* gfx header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
|
||||
@@ -4296,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
|
||||
static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0, i;
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 count = 0;
|
||||
int ctx_reg_offset;
|
||||
|
||||
if (adev->gfx.rlc.cs_data == NULL)
|
||||
@@ -4306,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
count = amdgpu_gfx_csb_preamble_start(buffer);
|
||||
count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] =
|
||||
cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index -
|
||||
PACKET3_SET_CONTEXT_REG_START);
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx_reg_offset =
|
||||
SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
|
||||
ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
buffer[count++] = cpu_to_le32(ctx_reg_offset);
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
amdgpu_gfx_csb_preamble_end(buffer, count);
|
||||
}
|
||||
|
||||
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
|
||||
@@ -4752,6 +4755,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
int i, j, k, r, ring_id = 0;
|
||||
int xcc_id = 0;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
|
||||
|
||||
INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
|
||||
|
||||
@@ -4763,7 +4767,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
case IP_VERSION(10, 1, 4):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 8;
|
||||
adev->gfx.mec.num_mec = 2;
|
||||
adev->gfx.mec.num_pipe_per_mec = 4;
|
||||
adev->gfx.mec.num_queue_per_pipe = 8;
|
||||
@@ -4778,7 +4782,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
case IP_VERSION(10, 3, 7):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 2;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 2;
|
||||
adev->gfx.mec.num_mec = 2;
|
||||
adev->gfx.mec.num_pipe_per_mec = 4;
|
||||
adev->gfx.mec.num_queue_per_pipe = 4;
|
||||
@@ -4800,7 +4804,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
|
||||
if (adev->gfx.me_fw_version >= 101 &&
|
||||
adev->gfx.pfp_fw_version >= 158 &&
|
||||
adev->gfx.mec_fw_version >= 152) {
|
||||
adev->gfx.mec_fw_version >= 151) {
|
||||
adev->gfx.enable_cleaner_shader = true;
|
||||
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
|
||||
if (r) {
|
||||
@@ -4810,7 +4814,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(10, 3, 0):
|
||||
case IP_VERSION(10, 3, 1):
|
||||
case IP_VERSION(10, 3, 2):
|
||||
case IP_VERSION(10, 3, 3):
|
||||
case IP_VERSION(10, 3, 4):
|
||||
case IP_VERSION(10, 3, 5):
|
||||
adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
|
||||
@@ -4826,6 +4832,34 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
}
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(10, 3, 6):
|
||||
adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
|
||||
adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
|
||||
if (adev->gfx.me_fw_version >= 14 &&
|
||||
adev->gfx.pfp_fw_version >= 17 &&
|
||||
adev->gfx.mec_fw_version >= 24) {
|
||||
adev->gfx.enable_cleaner_shader = true;
|
||||
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
|
||||
if (r) {
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(10, 3, 7):
|
||||
adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
|
||||
adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
|
||||
if (adev->gfx.me_fw_version >= 4 &&
|
||||
adev->gfx.pfp_fw_version >= 9 &&
|
||||
adev->gfx.mec_fw_version >= 12) {
|
||||
adev->gfx.enable_cleaner_shader = true;
|
||||
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
|
||||
if (r) {
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
break;
|
||||
@@ -4886,7 +4920,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
/* set up the gfx ring */
|
||||
for (i = 0; i < adev->gfx.me.num_me; i++) {
|
||||
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
|
||||
for (j = 0; j < num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
|
||||
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
|
||||
continue;
|
||||
@@ -9645,9 +9679,14 @@ static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe
|
||||
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
|
||||
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_10[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
"mmCP_MEC_ME2_HEADER_DUMP",
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
else
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_10[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
}
|
||||
index += reg_count;
|
||||
}
|
||||
@@ -9708,9 +9747,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
|
||||
nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
|
||||
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
gc_cp_reg_list_10[reg]));
|
||||
if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
|
||||
else
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
gc_cp_reg_list_10[reg]));
|
||||
}
|
||||
index += reg_count;
|
||||
}
|
||||
|
||||
@@ -48,6 +48,8 @@
|
||||
#include "gfx_v11_0_3.h"
|
||||
#include "nbio_v4_3.h"
|
||||
#include "mes_v11_0.h"
|
||||
#include "mes_userqueue.h"
|
||||
#include "amdgpu_userq_fence.h"
|
||||
|
||||
#define GFX11_NUM_GFX_RINGS 1
|
||||
#define GFX11_MEC_HPD_SIZE 2048
|
||||
@@ -177,9 +179,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
/* SE status registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
|
||||
@@ -230,7 +236,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
};
|
||||
|
||||
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
|
||||
@@ -259,7 +274,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
|
||||
@@ -580,33 +612,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
uint32_t padding, offset;
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
|
||||
padding = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_PADDING_OFFS);
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
|
||||
cpu_ptr = &adev->wb.wb[index];
|
||||
|
||||
ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
|
||||
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
|
||||
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
|
||||
*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
|
||||
} else {
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
|
||||
cpu_ptr = &adev->wb.wb[index];
|
||||
|
||||
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
|
||||
@@ -633,12 +650,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
else
|
||||
r = -EINVAL;
|
||||
err2:
|
||||
if (!ring->is_mes_queue)
|
||||
amdgpu_ib_free(&ib, NULL);
|
||||
amdgpu_ib_free(&ib, NULL);
|
||||
dma_fence_put(f);
|
||||
err1:
|
||||
if (!ring->is_mes_queue)
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -833,9 +848,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
|
||||
static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0, i;
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 count = 0;
|
||||
int ctx_reg_offset;
|
||||
|
||||
if (adev->gfx.rlc.cs_data == NULL)
|
||||
@@ -843,39 +856,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
count = amdgpu_gfx_csb_preamble_start(buffer);
|
||||
count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] =
|
||||
cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index -
|
||||
PACKET3_SET_CONTEXT_REG_START);
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx_reg_offset =
|
||||
SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
|
||||
ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
buffer[count++] = cpu_to_le32(ctx_reg_offset);
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
amdgpu_gfx_csb_preamble_end(buffer, count);
|
||||
}
|
||||
|
||||
static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
|
||||
@@ -1056,14 +1045,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
|
||||
#define MQD_FWWORKAREA_SIZE 484
|
||||
#define MQD_FWWORKAREA_ALIGNMENT 256
|
||||
|
||||
static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
|
||||
static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
|
||||
struct amdgpu_gfx_shadow_info *shadow_info)
|
||||
{
|
||||
if (adev->gfx.cp_gfx_shadow) {
|
||||
shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
|
||||
shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
|
||||
shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
|
||||
shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
|
||||
shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
|
||||
shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
|
||||
shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
|
||||
shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
|
||||
struct amdgpu_gfx_shadow_info *shadow_info,
|
||||
bool skip_check)
|
||||
{
|
||||
if (adev->gfx.cp_gfx_shadow || skip_check) {
|
||||
gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
|
||||
return 0;
|
||||
} else {
|
||||
memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
|
||||
@@ -1136,6 +1132,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
|
||||
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
if (adev->gfx.disable_kq) {
|
||||
ring->no_scheduler = true;
|
||||
ring->no_user_submission = true;
|
||||
}
|
||||
|
||||
if (!ring_id)
|
||||
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
|
||||
@@ -1568,24 +1568,18 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
|
||||
|
||||
static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
int i, j, k, r, ring_id = 0;
|
||||
int i, j, k, r, ring_id;
|
||||
int xcc_id = 0;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
|
||||
|
||||
INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
|
||||
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(11, 0, 0):
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
adev->gfx.mec.num_mec = 1;
|
||||
adev->gfx.mec.num_pipe_per_mec = 4;
|
||||
adev->gfx.mec.num_queue_per_pipe = 4;
|
||||
break;
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
case IP_VERSION(11, 5, 0):
|
||||
case IP_VERSION(11, 5, 1):
|
||||
@@ -1593,7 +1587,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
case IP_VERSION(11, 5, 3):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 2;
|
||||
adev->gfx.mec.num_mec = 1;
|
||||
adev->gfx.mec.num_pipe_per_mec = 4;
|
||||
adev->gfx.mec.num_queue_per_pipe = 4;
|
||||
@@ -1608,6 +1602,35 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
break;
|
||||
}
|
||||
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(11, 0, 0):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
case IP_VERSION(11, 0, 3):
|
||||
if (!adev->gfx.disable_uq &&
|
||||
adev->gfx.me_fw_version >= 2390 &&
|
||||
adev->gfx.pfp_fw_version >= 2530 &&
|
||||
adev->gfx.mec_fw_version >= 2600 &&
|
||||
adev->mes.fw_version[0] >= 120) {
|
||||
adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
|
||||
adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(11, 0, 1):
|
||||
case IP_VERSION(11, 0, 4):
|
||||
case IP_VERSION(11, 5, 0):
|
||||
case IP_VERSION(11, 5, 1):
|
||||
case IP_VERSION(11, 5, 2):
|
||||
case IP_VERSION(11, 5, 3):
|
||||
/* add firmware version checks here */
|
||||
if (0 && !adev->gfx.disable_uq) {
|
||||
adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
|
||||
adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(11, 0, 0):
|
||||
case IP_VERSION(11, 0, 2):
|
||||
@@ -1640,6 +1663,34 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
}
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(11, 5, 2):
|
||||
adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
|
||||
adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
|
||||
if (adev->gfx.me_fw_version >= 12 &&
|
||||
adev->gfx.pfp_fw_version >= 15 &&
|
||||
adev->gfx.mec_fw_version >= 15) {
|
||||
adev->gfx.enable_cleaner_shader = true;
|
||||
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
|
||||
if (r) {
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case IP_VERSION(11, 5, 3):
|
||||
adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
|
||||
adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
|
||||
if (adev->gfx.me_fw_version >= 7 &&
|
||||
adev->gfx.pfp_fw_version >= 8 &&
|
||||
adev->gfx.mec_fw_version >= 8) {
|
||||
adev->gfx.enable_cleaner_shader = true;
|
||||
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
|
||||
if (r) {
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
break;
|
||||
@@ -1701,37 +1752,42 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the gfx ring */
|
||||
for (i = 0; i < adev->gfx.me.num_me; i++) {
|
||||
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
|
||||
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
|
||||
continue;
|
||||
if (adev->gfx.num_gfx_rings) {
|
||||
ring_id = 0;
|
||||
/* set up the gfx ring */
|
||||
for (i = 0; i < adev->gfx.me.num_me; i++) {
|
||||
for (j = 0; j < num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
|
||||
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
|
||||
continue;
|
||||
|
||||
r = gfx_v11_0_gfx_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
ring_id++;
|
||||
r = gfx_v11_0_gfx_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
ring_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ring_id = 0;
|
||||
/* set up the compute queues - allocate horizontally across pipes */
|
||||
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
|
||||
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
|
||||
if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
|
||||
k, j))
|
||||
continue;
|
||||
if (adev->gfx.num_compute_rings) {
|
||||
ring_id = 0;
|
||||
/* set up the compute queues - allocate horizontally across pipes */
|
||||
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
|
||||
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
|
||||
if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
|
||||
k, j))
|
||||
continue;
|
||||
|
||||
r = gfx_v11_0_compute_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
r = gfx_v11_0_compute_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring_id++;
|
||||
ring_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4061,6 +4117,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
#ifdef __BIG_ENDIAN
|
||||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
|
||||
#endif
|
||||
if (prop->tmz_queue)
|
||||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
|
||||
mqd->cp_gfx_hqd_cntl = tmp;
|
||||
|
||||
/* set up cp_doorbell_control */
|
||||
@@ -4081,6 +4139,16 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
/* active the queue */
|
||||
mqd->cp_gfx_hqd_active = 1;
|
||||
|
||||
/* set gfx UQ items */
|
||||
mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
|
||||
mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
|
||||
mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
|
||||
mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
|
||||
mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
|
||||
mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
|
||||
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
|
||||
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4205,6 +4273,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
prop->allow_tunneling);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
|
||||
if (prop->tmz_queue)
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
|
||||
mqd->cp_hqd_pq_control = tmp;
|
||||
|
||||
/* set the wb address whether it's enabled or not */
|
||||
@@ -4256,6 +4326,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
|
||||
mqd->cp_hqd_active = prop->hqd_active;
|
||||
|
||||
/* set UQ fenceaddress */
|
||||
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
|
||||
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4509,11 +4583,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
if (adev->gfx.disable_kq) {
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
/* we don't want to set ring->ready */
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
if (amdgpu_async_gfx_ring)
|
||||
amdgpu_gfx_disable_kgq(adev, 0);
|
||||
} else {
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
@@ -4722,6 +4808,49 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
return r;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
unsigned int irq_type;
|
||||
int m, p, r;
|
||||
|
||||
if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
|
||||
for (m = 0; m < adev->gfx.me.num_me; m++) {
|
||||
for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
|
||||
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
|
||||
if (enable)
|
||||
r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
else
|
||||
r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
|
||||
for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
|
||||
for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
|
||||
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
|
||||
+ (m * adev->gfx.mec.num_pipe_per_mec)
|
||||
+ p;
|
||||
if (enable)
|
||||
r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
else
|
||||
r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
@@ -4731,9 +4860,11 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
|
||||
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
|
||||
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
|
||||
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
|
||||
gfx_v11_0_set_userq_eop_interrupts(adev, false);
|
||||
|
||||
if (!adev->no_hw_access) {
|
||||
if (amdgpu_async_gfx_ring) {
|
||||
if (amdgpu_async_gfx_ring &&
|
||||
!adev->gfx.disable_kq) {
|
||||
if (amdgpu_gfx_disable_kgq(adev, 0))
|
||||
DRM_ERROR("KGQ disable failed\n");
|
||||
}
|
||||
@@ -5059,11 +5190,36 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
switch (amdgpu_user_queue) {
|
||||
case -1:
|
||||
case 0:
|
||||
default:
|
||||
adev->gfx.disable_kq = false;
|
||||
adev->gfx.disable_uq = true;
|
||||
break;
|
||||
case 1:
|
||||
adev->gfx.disable_kq = false;
|
||||
adev->gfx.disable_uq = false;
|
||||
break;
|
||||
case 2:
|
||||
adev->gfx.disable_kq = true;
|
||||
adev->gfx.disable_uq = false;
|
||||
break;
|
||||
}
|
||||
|
||||
adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
|
||||
|
||||
adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
|
||||
AMDGPU_MAX_COMPUTE_RINGS);
|
||||
if (adev->gfx.disable_kq) {
|
||||
/* We need one GFX ring temporarily to set up
|
||||
* the clear state.
|
||||
*/
|
||||
adev->gfx.num_gfx_rings = 1;
|
||||
adev->gfx.num_compute_rings = 0;
|
||||
} else {
|
||||
adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
|
||||
AMDGPU_MAX_COMPUTE_RINGS);
|
||||
}
|
||||
|
||||
gfx_v11_0_set_kiq_pm4_funcs(adev);
|
||||
gfx_v11_0_set_ring_funcs(adev);
|
||||
@@ -5094,6 +5250,11 @@ static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
|
||||
r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5691,10 +5852,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
|
||||
}
|
||||
|
||||
if (ring->is_mes_queue)
|
||||
/* inherit vmid from mqd */
|
||||
control |= 0x400000;
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
|
||||
amdgpu_ring_write(ring,
|
||||
@@ -5714,10 +5871,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
if (ring->is_mes_queue)
|
||||
/* inherit vmid from mqd */
|
||||
control |= 0x40000000;
|
||||
|
||||
/* Currently, there is a high possibility to get wave ID mismatch
|
||||
* between ME and GDS, leading to a hw deadlock, because ME generates
|
||||
* different wave IDs than the GDS expects. This situation happens
|
||||
@@ -5775,8 +5928,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||
amdgpu_ring_write(ring, lower_32_bits(seq));
|
||||
amdgpu_ring_write(ring, upper_32_bits(seq));
|
||||
amdgpu_ring_write(ring, ring->is_mes_queue ?
|
||||
(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
|
||||
@@ -5804,10 +5956,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
|
||||
static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr)
|
||||
{
|
||||
if (ring->is_mes_queue)
|
||||
gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
|
||||
else
|
||||
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
|
||||
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
|
||||
|
||||
/* compute doesn't have PFP */
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
|
||||
@@ -6036,28 +6185,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
|
||||
void *de_payload_cpu_addr;
|
||||
int cnt;
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v10_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
de_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
offset = offsetof(struct v10_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gds_backup) +
|
||||
offsetof(struct v10_gfx_meta_data, de_payload);
|
||||
gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
} else {
|
||||
offset = offsetof(struct v10_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
|
||||
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
|
||||
AMDGPU_CSA_SIZE - adev->gds.gds_size,
|
||||
PAGE_SIZE);
|
||||
}
|
||||
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
|
||||
AMDGPU_CSA_SIZE - adev->gds.gds_size,
|
||||
PAGE_SIZE);
|
||||
|
||||
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
|
||||
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
|
||||
@@ -6296,25 +6430,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
int i;
|
||||
u32 doorbell_offset = entry->src_data[0];
|
||||
u8 me_id, pipe_id, queue_id;
|
||||
struct amdgpu_ring *ring;
|
||||
uint32_t mes_queue_id = entry->src_data[0];
|
||||
int i;
|
||||
|
||||
DRM_DEBUG("IH: CP EOP\n");
|
||||
|
||||
if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
|
||||
struct amdgpu_mes_queue *queue;
|
||||
if (adev->enable_mes && doorbell_offset) {
|
||||
struct amdgpu_userq_fence_driver *fence_drv = NULL;
|
||||
struct xarray *xa = &adev->userq_xa;
|
||||
unsigned long flags;
|
||||
|
||||
mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
|
||||
|
||||
spin_lock(&adev->mes.queue_id_lock);
|
||||
queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
|
||||
if (queue) {
|
||||
DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
|
||||
amdgpu_fence_process(queue->ring);
|
||||
}
|
||||
spin_unlock(&adev->mes.queue_id_lock);
|
||||
xa_lock_irqsave(xa, flags);
|
||||
fence_drv = xa_load(xa, doorbell_offset);
|
||||
if (fence_drv)
|
||||
amdgpu_userq_fence_driver_process(fence_drv);
|
||||
xa_unlock_irqrestore(xa, flags);
|
||||
} else {
|
||||
me_id = (entry->ring_id & 0x0c) >> 2;
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
@@ -6481,27 +6613,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
queue_id = (entry->ring_id & 0x70) >> 4;
|
||||
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
if (!adev->gfx.disable_kq) {
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6609,6 +6743,69 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
|
||||
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
|
||||
}
|
||||
|
||||
static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
|
||||
{
|
||||
/* Disable the pipe reset until the CPFW fully support it.*/
|
||||
dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reset_pipe = 0, clean_pipe = 0;
|
||||
int r;
|
||||
|
||||
if (!gfx_v11_pipe_reset_support(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
gfx_v11_0_set_safe_mode(adev, 0);
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE0_RESET, 1);
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
ME_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE0_RESET, 0);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
ME_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE1_RESET, 1);
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
ME_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE1_RESET, 0);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
ME_PIPE1_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
|
||||
WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
|
||||
|
||||
r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
|
||||
RS64_FW_UC_START_ADDR_LO;
|
||||
soc21_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
gfx_v11_0_unset_safe_mode(adev, 0);
|
||||
|
||||
dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
|
||||
r == 0 ? "successfully" : "failed");
|
||||
/* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
|
||||
* so the pipe reset status relies on the later gfx ring test result.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
@@ -6618,8 +6815,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
return -EINVAL;
|
||||
|
||||
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
|
||||
if (r)
|
||||
return r;
|
||||
if (r) {
|
||||
|
||||
dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
|
||||
r = gfx_v11_reset_gfx_pipe(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v11_0_kgq_init_queue(ring, true);
|
||||
if (r) {
|
||||
@@ -6636,6 +6838,136 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
return amdgpu_ring_test_ring(ring);
|
||||
}
|
||||
|
||||
static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
|
||||
{
|
||||
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reset_pipe = 0, clean_pipe = 0;
|
||||
int r;
|
||||
|
||||
if (!gfx_v11_pipe_reset_support(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
gfx_v11_0_set_safe_mode(adev, 0);
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
|
||||
reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
|
||||
clean_pipe = reset_pipe;
|
||||
|
||||
if (adev->gfx.rs64_enable) {
|
||||
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE1_RESET, 0);
|
||||
break;
|
||||
case 2:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE2_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE2_RESET, 0);
|
||||
break;
|
||||
case 3:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE3_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE3_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
|
||||
r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
|
||||
RS64_FW_UC_START_ADDR_LO;
|
||||
} else {
|
||||
if (ring->me == 1) {
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE1_RESET, 0);
|
||||
break;
|
||||
case 2:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE2_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE2_RESET, 0);
|
||||
break;
|
||||
case 3:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE3_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE3_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* mec1 fw pc: CP_MEC1_INSTR_PNTR */
|
||||
} else {
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE1_RESET, 0);
|
||||
break;
|
||||
case 2:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE2_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE2_RESET, 0);
|
||||
break;
|
||||
case 3:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE3_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME2_PIPE3_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
|
||||
}
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
|
||||
r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
|
||||
}
|
||||
|
||||
soc21_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
gfx_v11_0_unset_safe_mode(adev, 0);
|
||||
|
||||
dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
|
||||
r == 0 ? "successfully" : "failed");
|
||||
/*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
|
||||
* reset status relies on the compute ring test result.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
@@ -6646,8 +6978,10 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
|
||||
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "reset via MMIO failed %d\n", r);
|
||||
return r;
|
||||
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
|
||||
r = gfx_v11_0_reset_compute_pipe(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v11_0_kcq_init_queue(ring, true);
|
||||
@@ -6693,9 +7027,14 @@ static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe
|
||||
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
|
||||
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_11[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
"regCP_MEC_ME2_HEADER_DUMP",
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
else
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_11[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
}
|
||||
index += reg_count;
|
||||
}
|
||||
@@ -6755,9 +7094,16 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
|
||||
/* ME0 is for GFX so start from 1 for CP */
|
||||
soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
gc_cp_reg_list_11[reg]));
|
||||
if (i &&
|
||||
gc_cp_reg_list_11[reg].reg_offset ==
|
||||
regCP_MEC_ME1_HEADER_DUMP)
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_OFFSET(GC, 0,
|
||||
regCP_MEC_ME2_HEADER_DUMP));
|
||||
else
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
gc_cp_reg_list_11[reg]));
|
||||
}
|
||||
index += reg_count;
|
||||
}
|
||||
|
||||
@@ -44,6 +44,8 @@
|
||||
#include "gfx_v12_0.h"
|
||||
#include "nbif_v6_3_1.h"
|
||||
#include "mes_v12_0.h"
|
||||
#include "mes_userqueue.h"
|
||||
#include "amdgpu_userq_fence.h"
|
||||
|
||||
#define GFX12_NUM_GFX_RINGS 1
|
||||
#define GFX12_MEC_HPD_SIZE 2048
|
||||
@@ -133,11 +135,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
|
||||
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
|
||||
/* SE status registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
|
||||
@@ -186,7 +191,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
};
|
||||
|
||||
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
|
||||
@@ -215,7 +229,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
|
||||
@@ -475,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
uint32_t padding, offset;
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
|
||||
padding = amdgpu_mes_ctx_get_offs(ring,
|
||||
AMDGPU_MES_CTX_PADDING_OFFS);
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
|
||||
cpu_ptr = &adev->wb.wb[index];
|
||||
|
||||
ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
|
||||
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
|
||||
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
|
||||
*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
|
||||
} else {
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
|
||||
cpu_ptr = &adev->wb.wb[index];
|
||||
|
||||
r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
|
||||
@@ -528,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
||||
else
|
||||
r = -EINVAL;
|
||||
err2:
|
||||
if (!ring->is_mes_queue)
|
||||
amdgpu_ib_free(&ib, NULL);
|
||||
amdgpu_ib_free(&ib, NULL);
|
||||
dma_fence_put(f);
|
||||
err1:
|
||||
if (!ring->is_mes_queue)
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -881,6 +895,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
|
||||
soc24_grbm_select(adev, me, pipe, q, vm);
|
||||
}
|
||||
|
||||
/* all sizes are in bytes */
|
||||
#define MQD_SHADOW_BASE_SIZE 73728
|
||||
#define MQD_SHADOW_BASE_ALIGNMENT 256
|
||||
#define MQD_FWWORKAREA_SIZE 484
|
||||
#define MQD_FWWORKAREA_ALIGNMENT 256
|
||||
|
||||
static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
|
||||
struct amdgpu_gfx_shadow_info *shadow_info)
|
||||
{
|
||||
shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
|
||||
shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
|
||||
shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
|
||||
shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
|
||||
}
|
||||
|
||||
static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
|
||||
struct amdgpu_gfx_shadow_info *shadow_info,
|
||||
bool skip_check)
|
||||
{
|
||||
if (adev->gfx.cp_gfx_shadow || skip_check) {
|
||||
gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
|
||||
.get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
|
||||
.select_se_sh = &gfx_v12_0_select_se_sh,
|
||||
@@ -889,6 +931,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
|
||||
.read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
|
||||
.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
|
||||
.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
|
||||
.get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
|
||||
};
|
||||
|
||||
static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
|
||||
@@ -1346,6 +1389,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
unsigned num_compute_rings;
|
||||
int xcc_id = 0;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
|
||||
|
||||
INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
|
||||
|
||||
@@ -1354,7 +1398,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
case IP_VERSION(12, 0, 1):
|
||||
adev->gfx.me.num_me = 1;
|
||||
adev->gfx.me.num_pipe_per_me = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 1;
|
||||
adev->gfx.me.num_queue_per_pipe = 8;
|
||||
adev->gfx.mec.num_mec = 1;
|
||||
adev->gfx.mec.num_pipe_per_mec = 2;
|
||||
adev->gfx.mec.num_queue_per_pipe = 4;
|
||||
@@ -1369,6 +1413,22 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
break;
|
||||
}
|
||||
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(12, 0, 0):
|
||||
case IP_VERSION(12, 0, 1):
|
||||
if (!adev->gfx.disable_uq &&
|
||||
adev->gfx.me_fw_version >= 2780 &&
|
||||
adev->gfx.pfp_fw_version >= 2840 &&
|
||||
adev->gfx.mec_fw_version >= 3050 &&
|
||||
adev->mes.fw_version[0] >= 123) {
|
||||
adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
|
||||
adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(12, 0, 0):
|
||||
case IP_VERSION(12, 0, 1):
|
||||
@@ -1383,11 +1443,13 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
break;
|
||||
}
|
||||
|
||||
/* recalculate compute rings to use based on hardware configuration */
|
||||
num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
|
||||
adev->gfx.mec.num_queue_per_pipe) / 2;
|
||||
adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
|
||||
num_compute_rings);
|
||||
if (adev->gfx.num_compute_rings) {
|
||||
/* recalculate compute rings to use based on hardware configuration */
|
||||
num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
|
||||
adev->gfx.mec.num_queue_per_pipe) / 2;
|
||||
adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
|
||||
num_compute_rings);
|
||||
}
|
||||
|
||||
/* EOP Event */
|
||||
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
|
||||
@@ -1433,37 +1495,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the gfx ring */
|
||||
for (i = 0; i < adev->gfx.me.num_me; i++) {
|
||||
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
|
||||
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
|
||||
continue;
|
||||
if (adev->gfx.num_gfx_rings) {
|
||||
/* set up the gfx ring */
|
||||
for (i = 0; i < adev->gfx.me.num_me; i++) {
|
||||
for (j = 0; j < num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
|
||||
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
|
||||
continue;
|
||||
|
||||
r = gfx_v12_0_gfx_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
ring_id++;
|
||||
r = gfx_v12_0_gfx_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
ring_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ring_id = 0;
|
||||
/* set up the compute queues - allocate horizontally across pipes */
|
||||
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
|
||||
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
|
||||
if (!amdgpu_gfx_is_mec_queue_enabled(adev,
|
||||
0, i, k, j))
|
||||
continue;
|
||||
if (adev->gfx.num_compute_rings) {
|
||||
ring_id = 0;
|
||||
/* set up the compute queues - allocate horizontally across pipes */
|
||||
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
|
||||
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
|
||||
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
|
||||
if (!amdgpu_gfx_is_mec_queue_enabled(adev,
|
||||
0, i, k, j))
|
||||
continue;
|
||||
|
||||
r = gfx_v12_0_compute_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
r = gfx_v12_0_compute_ring_init(adev, ring_id,
|
||||
i, k, j);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
ring_id++;
|
||||
ring_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2948,6 +3014,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
#ifdef __BIG_ENDIAN
|
||||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
|
||||
#endif
|
||||
if (prop->tmz_queue)
|
||||
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
|
||||
mqd->cp_gfx_hqd_cntl = tmp;
|
||||
|
||||
/* set up cp_doorbell_control */
|
||||
@@ -2968,6 +3036,14 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
/* active the queue */
|
||||
mqd->cp_gfx_hqd_active = 1;
|
||||
|
||||
/* set gfx UQ items */
|
||||
mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
|
||||
mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
|
||||
mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
|
||||
mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
|
||||
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
|
||||
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3091,6 +3167,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
|
||||
if (prop->tmz_queue)
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
|
||||
mqd->cp_hqd_pq_control = tmp;
|
||||
|
||||
/* set the wb address whether it's enabled or not */
|
||||
@@ -3142,6 +3220,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
|
||||
|
||||
mqd->cp_hqd_active = prop->hqd_active;
|
||||
|
||||
/* set UQ fenceaddress */
|
||||
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
|
||||
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3600,6 +3682,49 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
return r;
|
||||
}
|
||||
|
||||
static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
unsigned int irq_type;
|
||||
int m, p, r;
|
||||
|
||||
if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
|
||||
for (m = 0; m < adev->gfx.me.num_me; m++) {
|
||||
for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
|
||||
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
|
||||
if (enable)
|
||||
r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
else
|
||||
r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
|
||||
for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
|
||||
for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
|
||||
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
|
||||
+ (m * adev->gfx.mec.num_pipe_per_mec)
|
||||
+ p;
|
||||
if (enable)
|
||||
r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
else
|
||||
r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
|
||||
irq_type);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
@@ -3610,6 +3735,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
|
||||
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
|
||||
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
|
||||
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
|
||||
gfx_v12_0_set_userq_eop_interrupts(adev, false);
|
||||
|
||||
if (!adev->no_hw_access) {
|
||||
if (amdgpu_async_gfx_ring) {
|
||||
@@ -3698,11 +3824,33 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
switch (amdgpu_user_queue) {
|
||||
case -1:
|
||||
case 0:
|
||||
default:
|
||||
adev->gfx.disable_kq = false;
|
||||
adev->gfx.disable_uq = true;
|
||||
break;
|
||||
case 1:
|
||||
adev->gfx.disable_kq = false;
|
||||
adev->gfx.disable_uq = false;
|
||||
break;
|
||||
case 2:
|
||||
adev->gfx.disable_kq = true;
|
||||
adev->gfx.disable_uq = false;
|
||||
break;
|
||||
}
|
||||
|
||||
adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
|
||||
|
||||
adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
|
||||
AMDGPU_MAX_COMPUTE_RINGS);
|
||||
if (adev->gfx.disable_kq) {
|
||||
adev->gfx.num_gfx_rings = 0;
|
||||
adev->gfx.num_compute_rings = 0;
|
||||
} else {
|
||||
adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
|
||||
AMDGPU_MAX_COMPUTE_RINGS);
|
||||
}
|
||||
|
||||
gfx_v12_0_set_kiq_pm4_funcs(adev);
|
||||
gfx_v12_0_set_ring_funcs(adev);
|
||||
@@ -3733,6 +3881,10 @@ static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4172,45 +4324,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
|
||||
static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t *wptr_saved;
|
||||
uint32_t *is_queue_unmap;
|
||||
uint64_t aggregated_db_index;
|
||||
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
|
||||
uint64_t wptr_tmp;
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
|
||||
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
|
||||
sizeof(uint32_t));
|
||||
aggregated_db_index =
|
||||
amdgpu_mes_get_aggregated_doorbell_index(adev,
|
||||
ring->hw_prio);
|
||||
|
||||
wptr_tmp = ring->wptr & ring->buf_mask;
|
||||
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
|
||||
*wptr_saved = wptr_tmp;
|
||||
/* assume doorbell always being used by mes mapped queue */
|
||||
if (*is_queue_unmap) {
|
||||
WDOORBELL64(aggregated_db_index, wptr_tmp);
|
||||
WDOORBELL64(ring->doorbell_index, wptr_tmp);
|
||||
} else {
|
||||
WDOORBELL64(ring->doorbell_index, wptr_tmp);
|
||||
|
||||
if (*is_queue_unmap)
|
||||
WDOORBELL64(aggregated_db_index, wptr_tmp);
|
||||
}
|
||||
if (ring->use_doorbell) {
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
|
||||
ring->wptr);
|
||||
WDOORBELL64(ring->doorbell_index, ring->wptr);
|
||||
} else {
|
||||
if (ring->use_doorbell) {
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
|
||||
ring->wptr);
|
||||
WDOORBELL64(ring->doorbell_index, ring->wptr);
|
||||
} else {
|
||||
WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
|
||||
lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
|
||||
upper_32_bits(ring->wptr));
|
||||
}
|
||||
WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
|
||||
lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
|
||||
upper_32_bits(ring->wptr));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4235,42 +4359,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
|
||||
static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t *wptr_saved;
|
||||
uint32_t *is_queue_unmap;
|
||||
uint64_t aggregated_db_index;
|
||||
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
|
||||
uint64_t wptr_tmp;
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
|
||||
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
|
||||
sizeof(uint32_t));
|
||||
aggregated_db_index =
|
||||
amdgpu_mes_get_aggregated_doorbell_index(adev,
|
||||
ring->hw_prio);
|
||||
|
||||
wptr_tmp = ring->wptr & ring->buf_mask;
|
||||
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
|
||||
*wptr_saved = wptr_tmp;
|
||||
/* assume doorbell always used by mes mapped queue */
|
||||
if (*is_queue_unmap) {
|
||||
WDOORBELL64(aggregated_db_index, wptr_tmp);
|
||||
WDOORBELL64(ring->doorbell_index, wptr_tmp);
|
||||
} else {
|
||||
WDOORBELL64(ring->doorbell_index, wptr_tmp);
|
||||
|
||||
if (*is_queue_unmap)
|
||||
WDOORBELL64(aggregated_db_index, wptr_tmp);
|
||||
}
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
if (ring->use_doorbell) {
|
||||
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
|
||||
ring->wptr);
|
||||
WDOORBELL64(ring->doorbell_index, ring->wptr);
|
||||
} else {
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
if (ring->use_doorbell) {
|
||||
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
|
||||
ring->wptr);
|
||||
WDOORBELL64(ring->doorbell_index, ring->wptr);
|
||||
} else {
|
||||
BUG(); /* only DOORBELL method supported on gfx12 now */
|
||||
}
|
||||
BUG(); /* only DOORBELL method supported on gfx12 now */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4317,10 +4413,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
||||
|
||||
control |= ib->length_dw | (vmid << 24);
|
||||
|
||||
if (ring->is_mes_queue)
|
||||
/* inherit vmid from mqd */
|
||||
control |= 0x400000;
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
|
||||
amdgpu_ring_write(ring,
|
||||
@@ -4340,10 +4432,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
|
||||
|
||||
if (ring->is_mes_queue)
|
||||
/* inherit vmid from mqd */
|
||||
control |= 0x40000000;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
|
||||
amdgpu_ring_write(ring,
|
||||
@@ -4383,8 +4471,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
||||
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||
amdgpu_ring_write(ring, lower_32_bits(seq));
|
||||
amdgpu_ring_write(ring, upper_32_bits(seq));
|
||||
amdgpu_ring_write(ring, ring->is_mes_queue ?
|
||||
(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
|
||||
@@ -4412,10 +4499,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
|
||||
static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vmid, uint64_t pd_addr)
|
||||
{
|
||||
if (ring->is_mes_queue)
|
||||
gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
|
||||
else
|
||||
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
|
||||
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
|
||||
|
||||
/* compute doesn't have PFP */
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
|
||||
@@ -4749,25 +4833,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
int i;
|
||||
u32 doorbell_offset = entry->src_data[0];
|
||||
u8 me_id, pipe_id, queue_id;
|
||||
struct amdgpu_ring *ring;
|
||||
uint32_t mes_queue_id = entry->src_data[0];
|
||||
int i;
|
||||
|
||||
DRM_DEBUG("IH: CP EOP\n");
|
||||
|
||||
if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
|
||||
struct amdgpu_mes_queue *queue;
|
||||
if (adev->enable_mes && doorbell_offset) {
|
||||
struct amdgpu_userq_fence_driver *fence_drv = NULL;
|
||||
struct xarray *xa = &adev->userq_xa;
|
||||
unsigned long flags;
|
||||
|
||||
mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
|
||||
|
||||
spin_lock(&adev->mes.queue_id_lock);
|
||||
queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
|
||||
if (queue) {
|
||||
DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
|
||||
amdgpu_fence_process(queue->ring);
|
||||
}
|
||||
spin_unlock(&adev->mes.queue_id_lock);
|
||||
xa_lock_irqsave(xa, flags);
|
||||
fence_drv = xa_load(xa, doorbell_offset);
|
||||
if (fence_drv)
|
||||
amdgpu_userq_fence_driver_process(fence_drv);
|
||||
xa_unlock_irqrestore(xa, flags);
|
||||
} else {
|
||||
me_id = (entry->ring_id & 0x0c) >> 2;
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
@@ -4934,27 +5016,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
|
||||
pipe_id = (entry->ring_id & 0x03) >> 0;
|
||||
queue_id = (entry->ring_id & 0x70) >> 4;
|
||||
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
if (!adev->gfx.disable_kq) {
|
||||
switch (me_id) {
|
||||
case 0:
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
ring = &adev->gfx.compute_ring[i];
|
||||
if (ring->me == me_id && ring->pipe == pipe_id &&
|
||||
ring->queue == queue_id)
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5160,6 +5244,69 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
|
||||
amdgpu_gfx_off_ctrl(adev, true);
|
||||
}
|
||||
|
||||
static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
|
||||
{
|
||||
/* Disable the pipe reset until the CPFW fully support it.*/
|
||||
dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reset_pipe = 0, clean_pipe = 0;
|
||||
int r;
|
||||
|
||||
if (!gfx_v12_pipe_reset_support(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
gfx_v12_0_set_safe_mode(adev, 0);
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE0_RESET, 1);
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
ME_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE0_RESET, 0);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
ME_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE1_RESET, 1);
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
|
||||
ME_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
PFP_PIPE1_RESET, 0);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
|
||||
ME_PIPE1_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
|
||||
WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
|
||||
|
||||
r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
|
||||
RS64_FW_UC_START_ADDR_LO;
|
||||
soc24_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
gfx_v12_0_unset_safe_mode(adev, 0);
|
||||
|
||||
dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
|
||||
r == 0 ? "successfully" : "failed");
|
||||
/* Sometimes the ME start pc counter can't cache correctly, so the
|
||||
* PC check only as a reference and pipe reset result rely on the
|
||||
* later ring test.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
@@ -5170,8 +5317,10 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
|
||||
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "reset via MES failed %d\n", r);
|
||||
return r;
|
||||
dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
|
||||
r = gfx_v12_reset_gfx_pipe(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v12_0_kgq_init_queue(ring, true);
|
||||
@@ -5189,6 +5338,89 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
return amdgpu_ring_test_ring(ring);
|
||||
}
|
||||
|
||||
static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reset_pipe = 0, clean_pipe = 0;
|
||||
int r = 0;
|
||||
|
||||
if (!gfx_v12_pipe_reset_support(adev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
gfx_v12_0_set_safe_mode(adev, 0);
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
|
||||
reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
|
||||
clean_pipe = reset_pipe;
|
||||
|
||||
if (adev->gfx.rs64_enable) {
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE1_RESET, 0);
|
||||
break;
|
||||
case 2:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE2_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE2_RESET, 0);
|
||||
break;
|
||||
case 3:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE3_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
|
||||
MEC_PIPE3_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
|
||||
r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
|
||||
RS64_FW_UC_START_ADDR_LO;
|
||||
} else {
|
||||
switch (ring->pipe) {
|
||||
case 0:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE0_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE0_RESET, 0);
|
||||
break;
|
||||
case 1:
|
||||
reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE1_RESET, 1);
|
||||
clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
|
||||
MEC_ME1_PIPE1_RESET, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
|
||||
WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
|
||||
/* Doesn't find the F32 MEC instruction pointer register, and suppose
|
||||
* the driver won't run into the F32 mode.
|
||||
*/
|
||||
}
|
||||
|
||||
soc24_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
gfx_v12_0_unset_safe_mode(adev, 0);
|
||||
|
||||
dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
|
||||
r == 0 ? "successfully" : "failed");
|
||||
/* Need the ring test to verify the pipe reset result.*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
@@ -5199,8 +5431,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
|
||||
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "reset via MMIO failed %d\n", r);
|
||||
return r;
|
||||
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
|
||||
r = gfx_v12_0_reset_compute_pipe(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v12_0_kcq_init_queue(ring, true);
|
||||
|
||||
@@ -53,6 +53,9 @@
|
||||
#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
|
||||
#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
|
||||
|
||||
#define GFX6_NUM_GFX_RINGS 1
|
||||
#define GFX6_NUM_COMPUTE_RINGS 2
|
||||
|
||||
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
|
||||
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
|
||||
@@ -1732,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
|
||||
gfx_v6_0_get_cu_info(adev);
|
||||
gfx_v6_0_config_init(adev);
|
||||
|
||||
WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
|
||||
(0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
|
||||
WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
|
||||
(0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
|
||||
WREG32(mmCP_QUEUE_THRESHOLDS,
|
||||
((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
|
||||
(0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
|
||||
|
||||
/* set HW defaults for 3D engine */
|
||||
WREG32(mmCP_MEQ_THRESHOLDS,
|
||||
(0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
|
||||
(0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
|
||||
|
||||
sx_debug_1 = RREG32(mmSX_DEBUG_1);
|
||||
WREG32(mmSX_DEBUG_1, sx_debug_1);
|
||||
@@ -2851,44 +2858,21 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
|
||||
static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0, i;
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 count = 0;
|
||||
|
||||
if (adev->gfx.rlc.cs_data == NULL)
|
||||
return;
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] =
|
||||
cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
count = amdgpu_gfx_csb_preamble_start(buffer);
|
||||
count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
amdgpu_gfx_csb_preamble_end(buffer, count);
|
||||
}
|
||||
|
||||
static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
|
||||
|
||||
@@ -55,6 +55,9 @@
|
||||
#define GFX7_NUM_GFX_RINGS 1
|
||||
#define GFX7_MEC_HPD_SIZE 2048
|
||||
|
||||
#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
|
||||
#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
|
||||
|
||||
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
|
||||
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
|
||||
@@ -3882,67 +3885,22 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
|
||||
static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0, i;
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 count = 0;
|
||||
|
||||
if (adev->gfx.rlc.cs_data == NULL)
|
||||
return;
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] =
|
||||
cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
count = amdgpu_gfx_csb_preamble_start(buffer);
|
||||
count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
|
||||
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_BONAIRE:
|
||||
buffer[count++] = cpu_to_le32(0x16000012);
|
||||
buffer[count++] = cpu_to_le32(0x00000000);
|
||||
break;
|
||||
case CHIP_KAVERI:
|
||||
buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
|
||||
buffer[count++] = cpu_to_le32(0x00000000);
|
||||
break;
|
||||
case CHIP_KABINI:
|
||||
case CHIP_MULLINS:
|
||||
buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
|
||||
buffer[count++] = cpu_to_le32(0x00000000);
|
||||
break;
|
||||
case CHIP_HAWAII:
|
||||
buffer[count++] = cpu_to_le32(0x3a00161a);
|
||||
buffer[count++] = cpu_to_le32(0x0000002e);
|
||||
break;
|
||||
default:
|
||||
buffer[count++] = cpu_to_le32(0x00000000);
|
||||
buffer[count++] = cpu_to_le32(0x00000000);
|
||||
break;
|
||||
}
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
amdgpu_gfx_csb_preamble_end(buffer, count);
|
||||
}
|
||||
|
||||
static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
|
||||
|
||||
@@ -1223,48 +1223,22 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
|
||||
static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0, i;
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 count = 0;
|
||||
|
||||
if (adev->gfx.rlc.cs_data == NULL)
|
||||
return;
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] =
|
||||
cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index -
|
||||
PACKET3_SET_CONTEXT_REG_START);
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
count = amdgpu_gfx_csb_preamble_start(buffer);
|
||||
count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
|
||||
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
|
||||
PACKET3_SET_CONTEXT_REG_START);
|
||||
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
|
||||
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
amdgpu_gfx_csb_preamble_end(buffer, count);
|
||||
}
|
||||
|
||||
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
|
||||
|
||||
@@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
/* SE status registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
|
||||
/* packet headers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP)
|
||||
};
|
||||
|
||||
static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
|
||||
@@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP)
|
||||
};
|
||||
|
||||
enum ta_ras_gfx_subblock {
|
||||
@@ -1624,42 +1651,16 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
|
||||
static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
|
||||
volatile u32 *buffer)
|
||||
{
|
||||
u32 count = 0, i;
|
||||
const struct cs_section_def *sect = NULL;
|
||||
const struct cs_extent_def *ext = NULL;
|
||||
u32 count = 0;
|
||||
|
||||
if (adev->gfx.rlc.cs_data == NULL)
|
||||
return;
|
||||
if (buffer == NULL)
|
||||
return;
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
buffer[count++] = cpu_to_le32(0x80000000);
|
||||
|
||||
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
|
||||
for (ext = sect->section; ext->extent != NULL; ++ext) {
|
||||
if (sect->id == SECT_CONTEXT) {
|
||||
buffer[count++] =
|
||||
cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
|
||||
buffer[count++] = cpu_to_le32(ext->reg_index -
|
||||
PACKET3_SET_CONTEXT_REG_START);
|
||||
for (i = 0; i < ext->reg_count; i++)
|
||||
buffer[count++] = cpu_to_le32(ext->extent[i]);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
|
||||
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
|
||||
|
||||
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
buffer[count++] = cpu_to_le32(0);
|
||||
count = amdgpu_gfx_csb_preamble_start(buffer);
|
||||
count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
|
||||
amdgpu_gfx_csb_preamble_end(buffer, count);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
|
||||
@@ -5441,16 +5442,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
|
||||
|
||||
payload_size = sizeof(struct v9_ce_ib_state);
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
|
||||
} else {
|
||||
payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
|
||||
}
|
||||
payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
|
||||
|
||||
if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
|
||||
memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
|
||||
@@ -5473,16 +5466,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
|
||||
|
||||
payload_size = sizeof(struct v9_de_ib_state);
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
|
||||
} else {
|
||||
payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
|
||||
}
|
||||
payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
|
||||
|
||||
((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
|
||||
IB_COMPLETION_STATUS_PREEMPTED;
|
||||
@@ -5672,19 +5657,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
|
||||
|
||||
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_gpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
ce_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
} else {
|
||||
offset = offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
}
|
||||
offset = offsetof(struct v9_gfx_meta_data, ce_payload);
|
||||
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
|
||||
@@ -5770,28 +5745,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo
|
||||
void *de_payload_cpu_addr;
|
||||
int cnt;
|
||||
|
||||
if (ring->is_mes_queue) {
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gfx_meta_data) +
|
||||
offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
de_payload_cpu_addr =
|
||||
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
|
||||
offset = offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
|
||||
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
|
||||
gfx[0].gds_backup) +
|
||||
offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
|
||||
} else {
|
||||
offset = offsetof(struct v9_gfx_meta_data, de_payload);
|
||||
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
|
||||
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
|
||||
|
||||
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
|
||||
AMDGPU_CSA_SIZE - adev->gds.gds_size,
|
||||
PAGE_SIZE);
|
||||
}
|
||||
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
|
||||
AMDGPU_CSA_SIZE - adev->gds.gds_size,
|
||||
PAGE_SIZE);
|
||||
|
||||
if (usegds) {
|
||||
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
|
||||
@@ -7339,9 +7299,14 @@ static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer
|
||||
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
|
||||
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_9[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
"mmCP_MEC_ME2_HEADER_DUMP",
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
else
|
||||
drm_printf(p, "%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_9[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues[index + reg]);
|
||||
}
|
||||
index += reg_count;
|
||||
}
|
||||
@@ -7378,9 +7343,13 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
|
||||
soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
|
||||
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
gc_cp_reg_list_9[reg]));
|
||||
if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
|
||||
else
|
||||
adev->gfx.ip_dump_compute_queues[index + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(
|
||||
gc_cp_reg_list_9[reg]));
|
||||
}
|
||||
index += reg_count;
|
||||
}
|
||||
@@ -7394,8 +7363,14 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
/* Emit the cleaner shader */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
|
||||
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
|
||||
else
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0));
|
||||
|
||||
amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
|
||||
}
|
||||
|
||||
|
||||
@@ -1547,7 +1547,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
|
||||
{
|
||||
uint32_t bank, way, mem;
|
||||
static const char * const vml2_way_str[] = { "BIGK", "4K" };
|
||||
static const char * const utcl2_rounter_str[] = { "VMC", "APT" };
|
||||
static const char * const utcl2_router_str[] = { "VMC", "APT" };
|
||||
|
||||
mem = instance % blk->num_mem_blocks;
|
||||
way = (instance / blk->num_mem_blocks) % blk->num_ways;
|
||||
@@ -1568,7 +1568,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
|
||||
dev_info(
|
||||
adev->dev,
|
||||
"GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
|
||||
bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt);
|
||||
bank, utcl2_router_str[mem], sec_cnt, ded_cnt);
|
||||
break;
|
||||
case ATC_L2_CACHE_2M:
|
||||
dev_info(
|
||||
|
||||
@@ -105,9 +105,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6),
|
||||
/* cp header registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP),
|
||||
/* SE status registers */
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
|
||||
@@ -154,6 +151,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = {
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_ras gfx_v9_4_3_ras;
|
||||
@@ -4558,12 +4563,21 @@ static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_pri
|
||||
"\nxcc:%d mec:%d, pipe:%d, queue:%d\n",
|
||||
xcc_id, i, j, k);
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
drm_printf(p,
|
||||
"%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_9_4_3[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues
|
||||
[xcc_offset + inst_offset +
|
||||
reg]);
|
||||
if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
|
||||
regCP_MEC_ME1_HEADER_DUMP)
|
||||
drm_printf(p,
|
||||
"%-50s \t 0x%08x\n",
|
||||
"regCP_MEC_ME2_HEADER_DUMP",
|
||||
adev->gfx.ip_dump_compute_queues
|
||||
[xcc_offset + inst_offset +
|
||||
reg]);
|
||||
else
|
||||
drm_printf(p,
|
||||
"%-50s \t 0x%08x\n",
|
||||
gc_cp_reg_list_9_4_3[reg].reg_name,
|
||||
adev->gfx.ip_dump_compute_queues
|
||||
[xcc_offset + inst_offset +
|
||||
reg]);
|
||||
}
|
||||
inst_offset += reg_count;
|
||||
}
|
||||
@@ -4612,12 +4626,20 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
|
||||
GET_INST(GC, xcc_id));
|
||||
|
||||
for (reg = 0; reg < reg_count; reg++) {
|
||||
adev->gfx.ip_dump_compute_queues
|
||||
[xcc_offset +
|
||||
inst_offset + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET_INST(
|
||||
gc_cp_reg_list_9_4_3[reg],
|
||||
GET_INST(GC, xcc_id)));
|
||||
if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
|
||||
regCP_MEC_ME1_HEADER_DUMP)
|
||||
adev->gfx.ip_dump_compute_queues
|
||||
[xcc_offset +
|
||||
inst_offset + reg] =
|
||||
RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
|
||||
regCP_MEC_ME2_HEADER_DUMP));
|
||||
else
|
||||
adev->gfx.ip_dump_compute_queues
|
||||
[xcc_offset +
|
||||
inst_offset + reg] =
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET_INST(
|
||||
gc_cp_reg_list_9_4_3[reg],
|
||||
GET_INST(GC, xcc_id)));
|
||||
}
|
||||
inst_offset += reg_count;
|
||||
}
|
||||
|
||||
@@ -428,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reg;
|
||||
|
||||
/* MES fw manages IH_VMID_x_LUT updating */
|
||||
if (ring->is_mes_queue)
|
||||
return;
|
||||
|
||||
if (ring->vm_hub == AMDGPU_GFXHUB(0))
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
|
||||
else
|
||||
|
||||
@@ -393,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reg;
|
||||
|
||||
/* MES fw manages IH_VMID_x_LUT updating */
|
||||
if (ring->is_mes_queue)
|
||||
return;
|
||||
|
||||
if (ring->vm_hub == AMDGPU_GFXHUB(0))
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
|
||||
else
|
||||
@@ -832,7 +828,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
* amdgpu graphics/compute will use VMIDs 1-7
|
||||
* amdkfd will use VMIDs 8-15
|
||||
*/
|
||||
adev->vm_manager.first_kfd_vmid = 8;
|
||||
adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
|
||||
|
||||
amdgpu_vm_manager_init(adev);
|
||||
|
||||
|
||||
@@ -413,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
uint32_t reg;
|
||||
|
||||
/* MES fw manages IH_VMID_x_LUT updating */
|
||||
if (ring->is_mes_queue)
|
||||
return;
|
||||
|
||||
if (ring->vm_hub == AMDGPU_GFXHUB(0))
|
||||
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
|
||||
else
|
||||
@@ -820,7 +816,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
* amdgpu graphics/compute will use VMIDs 1-7
|
||||
* amdkfd will use VMIDs 8-15
|
||||
*/
|
||||
adev->vm_manager.first_kfd_vmid = 8;
|
||||
adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
|
||||
|
||||
amdgpu_vm_manager_init(adev);
|
||||
|
||||
|
||||
@@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
|
||||
|
||||
/* disable VGA render */
|
||||
tmp = RREG32(mmVGA_RENDER_CONTROL);
|
||||
tmp &= ~VGA_VSTATUS_CNTL;
|
||||
tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK;
|
||||
WREG32(mmVGA_RENDER_CONTROL, tmp);
|
||||
}
|
||||
/* Update configuration */
|
||||
@@ -627,17 +627,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
|
||||
"write" : "read", block, mc_client, mc_id);
|
||||
}
|
||||
|
||||
/*
|
||||
static const u32 mc_cg_registers[] = {
|
||||
MC_HUB_MISC_HUB_CG,
|
||||
MC_HUB_MISC_SIP_CG,
|
||||
MC_HUB_MISC_VM_CG,
|
||||
MC_XPB_CLK_GAT,
|
||||
ATC_MISC_CG,
|
||||
MC_CITF_MISC_WR_CG,
|
||||
MC_CITF_MISC_RD_CG,
|
||||
MC_CITF_MISC_VM_CG,
|
||||
VM_L2_CG,
|
||||
mmMC_HUB_MISC_HUB_CG,
|
||||
mmMC_HUB_MISC_SIP_CG,
|
||||
mmMC_HUB_MISC_VM_CG,
|
||||
mmMC_XPB_CLK_GAT,
|
||||
mmATC_MISC_CG,
|
||||
mmMC_CITF_MISC_WR_CG,
|
||||
mmMC_CITF_MISC_RD_CG,
|
||||
mmMC_CITF_MISC_VM_CG,
|
||||
mmVM_L2_CG,
|
||||
};
|
||||
|
||||
static const u32 mc_cg_ls_en[] = {
|
||||
@@ -672,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
|
||||
orig = data = RREG32(mc_cg_registers[i]);
|
||||
if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
|
||||
data |= mc_cg_ls_en[i];
|
||||
else
|
||||
data &= ~mc_cg_ls_en[i];
|
||||
@@ -689,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
|
||||
orig = data = RREG32(mc_cg_registers[i]);
|
||||
if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
|
||||
data |= mc_cg_en[i];
|
||||
else
|
||||
data &= ~mc_cg_en[i];
|
||||
@@ -705,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
|
||||
|
||||
orig = data = RREG32_PCIE(ixPCIE_CNTL2);
|
||||
|
||||
if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
|
||||
data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
|
||||
data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
|
||||
data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
|
||||
@@ -728,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
|
||||
|
||||
orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
|
||||
|
||||
if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
|
||||
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
|
||||
else
|
||||
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
|
||||
@@ -744,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
|
||||
|
||||
orig = data = RREG32(mmHDP_MEM_POWER_LS);
|
||||
|
||||
if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
|
||||
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
|
||||
else
|
||||
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
|
||||
@@ -752,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
|
||||
if (orig != data)
|
||||
WREG32(mmHDP_MEM_POWER_LS, data);
|
||||
}
|
||||
*/
|
||||
|
||||
static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
|
||||
{
|
||||
@@ -1098,6 +1096,20 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
|
||||
static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
|
||||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
bool gate = false;
|
||||
|
||||
if (state == AMD_CG_STATE_GATE)
|
||||
gate = true;
|
||||
|
||||
if (!(adev->flags & AMD_IS_APU)) {
|
||||
gmc_v6_0_enable_mc_mgcg(adev, gate);
|
||||
gmc_v6_0_enable_mc_ls(adev, gate);
|
||||
}
|
||||
gmc_v6_0_enable_bif_mgls(adev, gate);
|
||||
gmc_v6_0_enable_hdp_mgcg(adev, gate);
|
||||
gmc_v6_0_enable_hdp_ls(adev, gate);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1157,17 +1157,10 @@ static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
|
||||
static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
unsigned int i;
|
||||
u32 tmp;
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
/* read MC_STATUS */
|
||||
tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
|
||||
SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
|
||||
SRBM_STATUS__MCC_BUSY_MASK |
|
||||
SRBM_STATUS__MCD_BUSY_MASK |
|
||||
SRBM_STATUS__VMC_BUSY_MASK);
|
||||
if (!tmp)
|
||||
if (gmc_v7_0_is_idle(ip_block))
|
||||
return 0;
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
@@ -1213,10 +1213,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
|
||||
if (uncached) {
|
||||
mtype = MTYPE_UC;
|
||||
} else if (ext_coherent) {
|
||||
if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id)
|
||||
mtype = is_local ? MTYPE_CC : MTYPE_UC;
|
||||
else
|
||||
mtype = MTYPE_UC;
|
||||
mtype = is_local ? MTYPE_CC : MTYPE_UC;
|
||||
} else if (adev->flags & AMD_IS_APU) {
|
||||
mtype = is_local ? mtype_local : MTYPE_NC;
|
||||
} else {
|
||||
@@ -1336,7 +1333,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
|
||||
mtype_local = MTYPE_CC;
|
||||
|
||||
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
|
||||
} else if (adev->rev_id) {
|
||||
} else {
|
||||
/* MTYPE_UC case */
|
||||
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
|
||||
}
|
||||
@@ -2411,13 +2408,6 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
adev->gmc.flush_tlb_needs_extra_type_2 =
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
|
||||
adev->gmc.xgmi.num_physical_nodes;
|
||||
/*
|
||||
* TODO: This workaround is badly documented and had a buggy
|
||||
* implementation. We should probably verify what we do here.
|
||||
*/
|
||||
adev->gmc.flush_tlb_needs_extra_type_0 =
|
||||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
|
||||
adev->rev_id == 0;
|
||||
|
||||
/* The sequence of these two function calls matters.*/
|
||||
gmc_v9_0_init_golden_registers(adev);
|
||||
|
||||
@@ -36,17 +36,6 @@
|
||||
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
|
||||
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
|
||||
|
||||
static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg) {
|
||||
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
|
||||
} else {
|
||||
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
@@ -180,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
|
||||
};
|
||||
|
||||
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
|
||||
.flush_hdp = hdp_v4_0_flush_hdp,
|
||||
.flush_hdp = amdgpu_hdp_generic_flush,
|
||||
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
|
||||
.update_clock_gating = hdp_v4_0_update_clock_gating,
|
||||
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
|
||||
|
||||
@@ -27,17 +27,6 @@
|
||||
#include "hdp/hdp_5_0_0_sh_mask.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg) {
|
||||
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
|
||||
} else {
|
||||
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
@@ -217,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
|
||||
.flush_hdp = hdp_v5_0_flush_hdp,
|
||||
.flush_hdp = amdgpu_hdp_generic_flush,
|
||||
.invalidate_hdp = hdp_v5_0_invalidate_hdp,
|
||||
.update_clock_gating = hdp_v5_0_update_clock_gating,
|
||||
.get_clock_gating_state = hdp_v5_0_get_clockgating_state,
|
||||
|
||||
@@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
|
||||
if (!ring || !ring->funcs->emit_wreg) {
|
||||
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
|
||||
0);
|
||||
RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
/* this is fine because SR_IOV doesn't remap the register */
|
||||
RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
|
||||
} else {
|
||||
/* We just need to read back a register to post the write.
|
||||
* Reading back the remapped register causes problems on
|
||||
* some platforms so just read back the memory size register.
|
||||
*/
|
||||
if (adev->nbio.funcs->get_memsize)
|
||||
adev->nbio.funcs->get_memsize(adev);
|
||||
}
|
||||
} else {
|
||||
amdgpu_ring_emit_wreg(ring,
|
||||
(adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
|
||||
|
||||
@@ -30,17 +30,6 @@
|
||||
#define regHDP_CLK_CNTL_V6_1 0xd5
|
||||
#define regHDP_CLK_CNTL_V6_1_BASE_IDX 0
|
||||
|
||||
static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg) {
|
||||
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
|
||||
} else {
|
||||
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
@@ -149,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
|
||||
.flush_hdp = hdp_v6_0_flush_hdp,
|
||||
.flush_hdp = amdgpu_hdp_generic_flush,
|
||||
.update_clock_gating = hdp_v6_0_update_clock_gating,
|
||||
.get_clock_gating_state = hdp_v6_0_get_clockgating_state,
|
||||
};
|
||||
|
||||
@@ -27,17 +27,6 @@
|
||||
#include "hdp/hdp_7_0_0_sh_mask.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg) {
|
||||
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
|
||||
} else {
|
||||
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
@@ -137,7 +126,7 @@ static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev,
|
||||
}
|
||||
|
||||
const struct amdgpu_hdp_funcs hdp_v7_0_funcs = {
|
||||
.flush_hdp = hdp_v7_0_flush_hdp,
|
||||
.flush_hdp = amdgpu_hdp_generic_flush,
|
||||
.update_clock_gating = hdp_v7_0_update_clock_gating,
|
||||
.get_clock_gating_state = hdp_v7_0_get_clockgating_state,
|
||||
};
|
||||
|
||||
@@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
ih[i]->overflow = false;
|
||||
}
|
||||
|
||||
/* update doorbell range for ih ring 0 */
|
||||
@@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
|
||||
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
|
||||
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
|
||||
goto out;
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
if (!amdgpu_sriov_vf(adev))
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
else
|
||||
ih->overflow = true;
|
||||
|
||||
/* When a ring buffer overflow happen start parsing interrupt
|
||||
* from the last not overwritten vector (wptr + 32). Hopefully
|
||||
|
||||
@@ -156,7 +156,7 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
|
||||
ring = &adev->jpeg.inst[i].ring_dec[j];
|
||||
ring->use_doorbell = false;
|
||||
ring->use_doorbell = true;
|
||||
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
ring->doorbell_index =
|
||||
@@ -264,7 +264,7 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
ring = &adev->jpeg.inst[i].ring_dec[j];
|
||||
if (ring->use_doorbell)
|
||||
WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
|
||||
(ring->pipe ? (ring->pipe - 0x15) : 0),
|
||||
ring->pipe,
|
||||
ring->doorbell_index <<
|
||||
VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
|
||||
VCN_JPEG_DB_CTRL__EN_MASK);
|
||||
|
||||
355
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
Normal file
355
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
Normal file
@@ -0,0 +1,355 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright 2024 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "mes_userqueue.h"
|
||||
#include "amdgpu_userq_fence.h"
|
||||
|
||||
#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
|
||||
#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
|
||||
|
||||
static int
|
||||
mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
|
||||
goto err_reserve_bo_failed;
|
||||
}
|
||||
|
||||
ret = amdgpu_ttm_alloc_gart(&bo->tbo);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
|
||||
goto err_map_bo_gart_failed;
|
||||
}
|
||||
|
||||
amdgpu_bo_unreserve(bo);
|
||||
bo = amdgpu_bo_ref(bo);
|
||||
|
||||
return 0;
|
||||
|
||||
err_map_bo_gart_failed:
|
||||
amdgpu_bo_unreserve(bo);
|
||||
err_reserve_bo_failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue,
|
||||
uint64_t wptr)
|
||||
{
|
||||
struct amdgpu_bo_va_mapping *wptr_mapping;
|
||||
struct amdgpu_vm *wptr_vm;
|
||||
struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
|
||||
int ret;
|
||||
|
||||
wptr_vm = queue->vm;
|
||||
ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
wptr &= AMDGPU_GMC_HOLE_MASK;
|
||||
wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
|
||||
amdgpu_bo_unreserve(wptr_vm->root.bo);
|
||||
if (!wptr_mapping) {
|
||||
DRM_ERROR("Failed to lookup wptr bo\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
wptr_obj->obj = wptr_mapping->bo_va->base.bo;
|
||||
if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
|
||||
DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to map wptr bo to GART\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int convert_to_mes_priority(int priority)
|
||||
{
|
||||
switch (priority) {
|
||||
case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
|
||||
default:
|
||||
return AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
|
||||
case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
|
||||
return AMDGPU_MES_PRIORITY_LEVEL_LOW;
|
||||
case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
|
||||
return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM;
|
||||
case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
|
||||
return AMDGPU_MES_PRIORITY_LEVEL_HIGH;
|
||||
}
|
||||
}
|
||||
|
||||
static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
|
||||
struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
|
||||
struct mes_add_queue_input queue_input;
|
||||
int r;
|
||||
|
||||
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
|
||||
|
||||
queue_input.process_va_start = 0;
|
||||
queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
|
||||
|
||||
/* set process quantum to 10 ms and gang quantum to 1 ms as default */
|
||||
queue_input.process_quantum = 100000;
|
||||
queue_input.gang_quantum = 10000;
|
||||
queue_input.paging = false;
|
||||
|
||||
queue_input.process_context_addr = ctx->gpu_addr;
|
||||
queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
|
||||
queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
|
||||
queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
|
||||
|
||||
queue_input.process_id = queue->vm->pasid;
|
||||
queue_input.queue_type = queue->queue_type;
|
||||
queue_input.mqd_addr = queue->mqd.gpu_addr;
|
||||
queue_input.wptr_addr = userq_props->wptr_gpu_addr;
|
||||
queue_input.queue_size = userq_props->queue_size >> 2;
|
||||
queue_input.doorbell_offset = userq_props->doorbell_index;
|
||||
queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
|
||||
queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct mes_remove_queue_input queue_input;
|
||||
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
|
||||
int r;
|
||||
|
||||
memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
|
||||
queue_input.doorbell_offset = queue->doorbell_index;
|
||||
queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
if (r)
|
||||
DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue,
|
||||
struct drm_amdgpu_userq_in *mqd_user)
|
||||
{
|
||||
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
|
||||
int r, size;
|
||||
|
||||
/*
|
||||
* The FW expects at least one page space allocated for
|
||||
* process ctx and gang ctx each. Create an object
|
||||
* for the same.
|
||||
*/
|
||||
size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
|
||||
r = amdgpu_userq_create_object(uq_mgr, ctx, size);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct drm_amdgpu_userq_in *args_in,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
|
||||
struct drm_amdgpu_userq_in *mqd_user = args_in;
|
||||
struct amdgpu_mqd_prop *userq_props;
|
||||
int r;
|
||||
|
||||
/* Structure to initialize MQD for userqueue using generic MQD init function */
|
||||
userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
|
||||
if (!userq_props) {
|
||||
DRM_ERROR("Failed to allocate memory for userq_props\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (!mqd_user->wptr_va || !mqd_user->rptr_va ||
|
||||
!mqd_user->queue_va || mqd_user->queue_size == 0) {
|
||||
DRM_ERROR("Invalid MQD parameters for userqueue\n");
|
||||
r = -EINVAL;
|
||||
goto free_props;
|
||||
}
|
||||
|
||||
r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to create MQD object for userqueue\n");
|
||||
goto free_props;
|
||||
}
|
||||
|
||||
/* Initialize the MQD BO with user given values */
|
||||
userq_props->wptr_gpu_addr = mqd_user->wptr_va;
|
||||
userq_props->rptr_gpu_addr = mqd_user->rptr_va;
|
||||
userq_props->queue_size = mqd_user->queue_size;
|
||||
userq_props->hqd_base_gpu_addr = mqd_user->queue_va;
|
||||
userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
|
||||
userq_props->use_doorbell = true;
|
||||
userq_props->doorbell_index = queue->doorbell_index;
|
||||
userq_props->fence_address = queue->fence_drv->gpu_addr;
|
||||
|
||||
if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
|
||||
struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
|
||||
|
||||
if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
|
||||
DRM_ERROR("Invalid compute IP MQD size\n");
|
||||
r = -EINVAL;
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
|
||||
if (IS_ERR(compute_mqd)) {
|
||||
DRM_ERROR("Failed to read user MQD\n");
|
||||
r = -ENOMEM;
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
userq_props->eop_gpu_addr = compute_mqd->eop_va;
|
||||
userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
|
||||
userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
|
||||
userq_props->hqd_active = false;
|
||||
userq_props->tmz_queue =
|
||||
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
|
||||
kfree(compute_mqd);
|
||||
} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
|
||||
struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
|
||||
|
||||
if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
|
||||
DRM_ERROR("Invalid GFX MQD\n");
|
||||
r = -EINVAL;
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
|
||||
if (IS_ERR(mqd_gfx_v11)) {
|
||||
DRM_ERROR("Failed to read user MQD\n");
|
||||
r = -ENOMEM;
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
|
||||
userq_props->csa_addr = mqd_gfx_v11->csa_va;
|
||||
userq_props->tmz_queue =
|
||||
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
|
||||
kfree(mqd_gfx_v11);
|
||||
} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
|
||||
struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
|
||||
|
||||
if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
|
||||
DRM_ERROR("Invalid SDMA MQD\n");
|
||||
r = -EINVAL;
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
|
||||
if (IS_ERR(mqd_sdma_v11)) {
|
||||
DRM_ERROR("Failed to read sdma user MQD\n");
|
||||
r = -ENOMEM;
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
userq_props->csa_addr = mqd_sdma_v11->csa_va;
|
||||
kfree(mqd_sdma_v11);
|
||||
}
|
||||
|
||||
queue->userq_prop = userq_props;
|
||||
|
||||
r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to initialize MQD for userqueue\n");
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
/* Create BO for FW operations */
|
||||
r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
/* FW expects WPTR BOs to be mapped into GART */
|
||||
r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to create WPTR mapping\n");
|
||||
goto free_ctx;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
free_ctx:
|
||||
amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
|
||||
|
||||
free_mqd:
|
||||
amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
|
||||
|
||||
free_props:
|
||||
kfree(userq_props);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
|
||||
kfree(queue->userq_prop);
|
||||
amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
|
||||
}
|
||||
|
||||
const struct amdgpu_userq_funcs userq_mes_funcs = {
|
||||
.mqd_create = mes_userq_mqd_create,
|
||||
.mqd_destroy = mes_userq_mqd_destroy,
|
||||
.unmap = mes_userq_unmap,
|
||||
.map = mes_userq_map,
|
||||
};
|
||||
30
drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
Normal file
30
drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
Normal file
@@ -0,0 +1,30 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright 2024 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MES_USERQ_H
|
||||
#define MES_USERQ_H
|
||||
#include "amdgpu_userq.h"
|
||||
|
||||
extern const struct amdgpu_userq_funcs userq_mes_funcs;
|
||||
#endif
|
||||
@@ -287,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int convert_to_mes_priority_level(int priority_level)
|
||||
{
|
||||
switch (priority_level) {
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_LOW:
|
||||
return AMD_PRIORITY_LEVEL_LOW;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
|
||||
default:
|
||||
return AMD_PRIORITY_LEVEL_NORMAL;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
|
||||
return AMD_PRIORITY_LEVEL_MEDIUM;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
|
||||
return AMD_PRIORITY_LEVEL_HIGH;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
|
||||
return AMD_PRIORITY_LEVEL_REALTIME;
|
||||
}
|
||||
}
|
||||
|
||||
static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
|
||||
struct mes_add_queue_input *input)
|
||||
{
|
||||
@@ -310,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
|
||||
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
|
||||
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
|
||||
mes_add_queue_pkt.inprocess_gang_priority =
|
||||
input->inprocess_gang_priority;
|
||||
convert_to_mes_priority_level(input->inprocess_gang_priority);
|
||||
mes_add_queue_pkt.gang_global_priority_level =
|
||||
input->gang_global_priority_level;
|
||||
convert_to_mes_priority_level(input->gang_global_priority_level);
|
||||
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
|
||||
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
|
||||
|
||||
@@ -458,31 +475,6 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
|
||||
return r;
|
||||
}
|
||||
|
||||
static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
|
||||
struct mes_reset_queue_input *input)
|
||||
{
|
||||
if (input->use_mmio)
|
||||
return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
|
||||
input->me_id, input->pipe_id,
|
||||
input->queue_id, input->vmid);
|
||||
|
||||
union MESAPI__RESET mes_reset_queue_pkt;
|
||||
|
||||
memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
|
||||
|
||||
mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
|
||||
mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
|
||||
mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
|
||||
|
||||
mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
|
||||
mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
|
||||
/*mes_reset_queue_pkt.reset_queue_only = 1;*/
|
||||
|
||||
return mes_v11_0_submit_pkt_and_poll_completion(mes,
|
||||
&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
|
||||
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
|
||||
}
|
||||
|
||||
static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
|
||||
struct mes_map_legacy_queue_input *input)
|
||||
{
|
||||
@@ -649,7 +641,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
|
||||
break;
|
||||
case MES_MISC_OP_CHANGE_CONFIG:
|
||||
if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
|
||||
dev_err(mes->adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n");
|
||||
dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
|
||||
@@ -694,7 +686,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
|
||||
mes->compute_hqd_mask[i];
|
||||
|
||||
for (i = 0; i < MAX_GFX_PIPES; i++)
|
||||
mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
|
||||
mes_set_hw_res_pkt.gfx_hqd_mask[i] =
|
||||
mes->gfx_hqd_mask[i];
|
||||
|
||||
for (i = 0; i < MAX_SDMA_PIPES; i++)
|
||||
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
|
||||
@@ -723,7 +716,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
|
||||
mes->event_log_gpu_addr;
|
||||
}
|
||||
|
||||
if (enforce_isolation)
|
||||
if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
|
||||
mes_set_hw_res_pkt.limit_single_process = 1;
|
||||
|
||||
return mes_v11_0_submit_pkt_and_poll_completion(mes,
|
||||
@@ -753,8 +746,8 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
|
||||
offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
|
||||
}
|
||||
|
||||
static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes,
|
||||
struct mes_reset_legacy_queue_input *input)
|
||||
static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
|
||||
struct mes_reset_queue_input *input)
|
||||
{
|
||||
union MESAPI__RESET mes_reset_queue_pkt;
|
||||
|
||||
@@ -772,7 +765,7 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes,
|
||||
mes_reset_queue_pkt.queue_type =
|
||||
convert_to_mes_queue_type(input->queue_type);
|
||||
|
||||
if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
|
||||
if (input->legacy_gfx) {
|
||||
mes_reset_queue_pkt.reset_legacy_gfx = 1;
|
||||
mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
|
||||
mes_reset_queue_pkt.queue_id_lp = input->queue_id;
|
||||
@@ -798,7 +791,6 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
|
||||
.suspend_gang = mes_v11_0_suspend_gang,
|
||||
.resume_gang = mes_v11_0_resume_gang,
|
||||
.misc_op = mes_v11_0_misc_op,
|
||||
.reset_legacy_queue = mes_v11_0_reset_legacy_queue,
|
||||
.reset_hw_queue = mes_v11_0_reset_hw_queue,
|
||||
};
|
||||
|
||||
@@ -1701,22 +1693,10 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_v11_0_late_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
/* it's only intended for use in mes_self_test case, not for s0ix and reset */
|
||||
if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend &&
|
||||
(amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3)))
|
||||
amdgpu_mes_self_test(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
|
||||
.name = "mes_v11_0",
|
||||
.early_init = mes_v11_0_early_init,
|
||||
.late_init = mes_v11_0_late_init,
|
||||
.late_init = NULL,
|
||||
.sw_init = mes_v11_0_sw_init,
|
||||
.sw_fini = mes_v11_0_sw_fini,
|
||||
.hw_init = mes_v11_0_hw_init,
|
||||
|
||||
@@ -274,6 +274,23 @@ static int convert_to_mes_queue_type(int queue_type)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int convert_to_mes_priority_level(int priority_level)
|
||||
{
|
||||
switch (priority_level) {
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_LOW:
|
||||
return AMD_PRIORITY_LEVEL_LOW;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
|
||||
default:
|
||||
return AMD_PRIORITY_LEVEL_NORMAL;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
|
||||
return AMD_PRIORITY_LEVEL_MEDIUM;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
|
||||
return AMD_PRIORITY_LEVEL_HIGH;
|
||||
case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
|
||||
return AMD_PRIORITY_LEVEL_REALTIME;
|
||||
}
|
||||
}
|
||||
|
||||
static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
|
||||
struct mes_add_queue_input *input)
|
||||
{
|
||||
@@ -297,9 +314,9 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
|
||||
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
|
||||
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
|
||||
mes_add_queue_pkt.inprocess_gang_priority =
|
||||
input->inprocess_gang_priority;
|
||||
convert_to_mes_priority_level(input->inprocess_gang_priority);
|
||||
mes_add_queue_pkt.gang_global_priority_level =
|
||||
input->gang_global_priority_level;
|
||||
convert_to_mes_priority_level(input->gang_global_priority_level);
|
||||
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
|
||||
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
|
||||
|
||||
@@ -477,32 +494,6 @@ static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
|
||||
return r;
|
||||
}
|
||||
|
||||
static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
|
||||
struct mes_reset_queue_input *input)
|
||||
{
|
||||
union MESAPI__RESET mes_reset_queue_pkt;
|
||||
int pipe;
|
||||
|
||||
memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
|
||||
|
||||
mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
|
||||
mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
|
||||
mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
|
||||
|
||||
mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
|
||||
mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
|
||||
/*mes_reset_queue_pkt.reset_queue_only = 1;*/
|
||||
|
||||
if (mes->adev->enable_uni_mes)
|
||||
pipe = AMDGPU_MES_KIQ_PIPE;
|
||||
else
|
||||
pipe = AMDGPU_MES_SCHED_PIPE;
|
||||
|
||||
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
|
||||
&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
|
||||
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
|
||||
}
|
||||
|
||||
static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
|
||||
struct mes_map_legacy_queue_input *input)
|
||||
{
|
||||
@@ -762,7 +753,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
|
||||
pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
|
||||
}
|
||||
|
||||
if (enforce_isolation)
|
||||
if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
|
||||
mes_set_hw_res_pkt.limit_single_process = 1;
|
||||
|
||||
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
|
||||
@@ -845,8 +836,8 @@ static void mes_v12_0_enable_unmapped_doorbell_handling(
|
||||
WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
|
||||
}
|
||||
|
||||
static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
|
||||
struct mes_reset_legacy_queue_input *input)
|
||||
static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
|
||||
struct mes_reset_queue_input *input)
|
||||
{
|
||||
union MESAPI__RESET mes_reset_queue_pkt;
|
||||
int pipe;
|
||||
@@ -865,7 +856,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
|
||||
mes_reset_queue_pkt.queue_type =
|
||||
convert_to_mes_queue_type(input->queue_type);
|
||||
|
||||
if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
|
||||
if (input->legacy_gfx) {
|
||||
mes_reset_queue_pkt.reset_legacy_gfx = 1;
|
||||
mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
|
||||
mes_reset_queue_pkt.queue_id_lp = input->queue_id;
|
||||
@@ -878,7 +869,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
|
||||
mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
|
||||
}
|
||||
|
||||
if (mes->adev->enable_uni_mes)
|
||||
if (input->is_kq)
|
||||
pipe = AMDGPU_MES_KIQ_PIPE;
|
||||
else
|
||||
pipe = AMDGPU_MES_SCHED_PIPE;
|
||||
@@ -896,7 +887,6 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
|
||||
.suspend_gang = mes_v12_0_suspend_gang,
|
||||
.resume_gang = mes_v12_0_resume_gang,
|
||||
.misc_op = mes_v12_0_misc_op,
|
||||
.reset_legacy_queue = mes_v12_0_reset_legacy_queue,
|
||||
.reset_hw_queue = mes_v12_0_reset_hw_queue,
|
||||
};
|
||||
|
||||
@@ -1811,21 +1801,10 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_v12_0_late_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
/* it's only intended for use in mes_self_test case, not for s0ix and reset */
|
||||
if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend)
|
||||
amdgpu_mes_self_test(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
|
||||
.name = "mes_v12_0",
|
||||
.early_init = mes_v12_0_early_init,
|
||||
.late_init = mes_v12_0_late_init,
|
||||
.late_init = NULL,
|
||||
.sw_init = mes_v12_0_sw_init,
|
||||
.sw_fini = mes_v12_0_sw_fini,
|
||||
.hw_init = mes_v12_0_hw_init,
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "soc15_common.h"
|
||||
#include "soc15.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_psp.h"
|
||||
|
||||
#define regVM_L2_CNTL3_DEFAULT 0x80100007
|
||||
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
|
||||
@@ -192,10 +193,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
|
||||
uint32_t tmp, inst_mask;
|
||||
int i;
|
||||
|
||||
/* Setup TLB control */
|
||||
inst_mask = adev->aid_mask;
|
||||
for_each_inst(i, inst_mask) {
|
||||
tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
|
||||
if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
|
||||
1);
|
||||
@@ -209,7 +208,26 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
|
||||
MTYPE, MTYPE_UC);/* XXX for emulation. */
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
|
||||
|
||||
WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
|
||||
} else {
|
||||
inst_mask = adev->aid_mask;
|
||||
for_each_inst(i, inst_mask) {
|
||||
tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
|
||||
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
|
||||
1);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
SYSTEM_ACCESS_MODE, 3);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 1);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
MTYPE, MTYPE_UC);/* XXX for emulation. */
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
|
||||
|
||||
WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,6 +239,9 @@ static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev)
|
||||
uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
|
||||
regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return;
|
||||
|
||||
inst_mask = adev->aid_mask;
|
||||
for_each_inst(i, inst_mask) {
|
||||
for (j = 0; j < 5; j++) { /* DAGB instances */
|
||||
@@ -454,6 +475,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp;
|
||||
u32 i, inst_mask;
|
||||
|
||||
if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
|
||||
tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 0);
|
||||
psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
|
||||
} else {
|
||||
inst_mask = adev->aid_mask;
|
||||
for_each_inst(i, inst_mask) {
|
||||
tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
|
||||
0);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 0);
|
||||
WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub;
|
||||
@@ -467,15 +512,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
|
||||
for (i = 0; i < 16; i++)
|
||||
WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
|
||||
i * hub->ctx_distance, 0);
|
||||
|
||||
/* Setup TLB control */
|
||||
tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
|
||||
0);
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL, 0);
|
||||
WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
|
||||
@@ -485,6 +521,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
|
||||
WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
|
||||
}
|
||||
}
|
||||
|
||||
mmhub_v1_8_disable_l1_tlb(adev);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -274,6 +274,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
|
||||
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
|
||||
struct amdgpu_reset_context reset_context = { 0 };
|
||||
|
||||
amdgpu_virt_fini_data_exchange(adev);
|
||||
|
||||
@@ -281,8 +282,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
|
||||
if (amdgpu_device_should_recover_gpu(adev)
|
||||
&& (!amdgpu_device_has_job_running(adev) ||
|
||||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
|
||||
struct amdgpu_reset_context reset_context;
|
||||
memset(&reset_context, 0, sizeof(reset_context));
|
||||
|
||||
reset_context.method = AMD_RESET_METHOD_NONE;
|
||||
reset_context.reset_req_dev = adev;
|
||||
@@ -293,6 +292,19 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
|
||||
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
|
||||
|
||||
if (down_read_trylock(&adev->reset_domain->sem)) {
|
||||
amdgpu_virt_fini_data_exchange(adev);
|
||||
amdgpu_virt_request_bad_pages(adev);
|
||||
amdgpu_virt_init_data_exchange(adev);
|
||||
up_read(&adev->reset_domain->sem);
|
||||
}
|
||||
}
|
||||
|
||||
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
@@ -312,26 +324,42 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
switch (event) {
|
||||
case IDH_FLR_NOTIFICATION:
|
||||
case IDH_RAS_BAD_PAGES_NOTIFICATION:
|
||||
xgpu_ai_mailbox_send_ack(adev);
|
||||
if (amdgpu_sriov_runtime(adev))
|
||||
schedule_work(&adev->virt.bad_pages_work);
|
||||
break;
|
||||
case IDH_UNRECOV_ERR_NOTIFICATION:
|
||||
xgpu_ai_mailbox_send_ack(adev);
|
||||
ras->is_rma = true;
|
||||
dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
|
||||
if (amdgpu_sriov_runtime(adev))
|
||||
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
|
||||
&adev->virt.flr_work),
|
||||
"Failed to queue work! at %s",
|
||||
__func__);
|
||||
&adev->virt.flr_work),
|
||||
"Failed to queue work! at %s",
|
||||
__func__);
|
||||
break;
|
||||
case IDH_QUERY_ALIVE:
|
||||
xgpu_ai_mailbox_send_ack(adev);
|
||||
break;
|
||||
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
|
||||
* it byfar since that polling thread will handle it,
|
||||
* other msg like flr complete is not handled here.
|
||||
*/
|
||||
case IDH_CLR_MSG_BUF:
|
||||
case IDH_FLR_NOTIFICATION_CMPL:
|
||||
case IDH_READY_TO_ACCESS_GPU:
|
||||
default:
|
||||
case IDH_FLR_NOTIFICATION:
|
||||
if (amdgpu_sriov_runtime(adev))
|
||||
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
|
||||
&adev->virt.flr_work),
|
||||
"Failed to queue work! at %s",
|
||||
__func__);
|
||||
break;
|
||||
case IDH_QUERY_ALIVE:
|
||||
xgpu_ai_mailbox_send_ack(adev);
|
||||
break;
|
||||
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
|
||||
* it byfar since that polling thread will handle it,
|
||||
* other msg like flr complete is not handled here.
|
||||
*/
|
||||
case IDH_CLR_MSG_BUF:
|
||||
case IDH_FLR_NOTIFICATION_CMPL:
|
||||
case IDH_READY_TO_ACCESS_GPU:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -387,6 +415,7 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
|
||||
INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -40,6 +40,7 @@ enum idh_request {
|
||||
IDH_LOG_VF_ERROR = 200,
|
||||
IDH_READY_TO_RESET = 201,
|
||||
IDH_RAS_POISON = 202,
|
||||
IDH_REQ_RAS_BAD_PAGES = 205,
|
||||
};
|
||||
|
||||
enum idh_event {
|
||||
@@ -54,6 +55,9 @@ enum idh_event {
|
||||
IDH_RAS_POISON_READY,
|
||||
IDH_PF_SOFT_FLR_NOTIFICATION,
|
||||
IDH_RAS_ERROR_DETECTED,
|
||||
IDH_RAS_BAD_PAGES_READY = 15,
|
||||
IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
|
||||
IDH_UNRECOV_ERR_NOTIFICATION = 17,
|
||||
IDH_TEXT_MESSAGE = 255,
|
||||
};
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user