From cb7ff314e1d9f3d6c62fa2c392e41174721ed0b3 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 6 Nov 2020 09:14:20 +0000
Subject: [PATCH 01/22] drm/tegra: sor: Don't warn on probe deferral

Deferred probe is an expected return value for tegra_output_probe().
Given that the driver deals with it properly, there's no need to output
a warning that may potentially confuse users.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index e88a17c2937f..a5b944dacb35 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3764,10 +3764,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		return err;
 
 	err = tegra_output_probe(&sor->output);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to probe output: %d\n", err);
-		return err;
-	}
+	if (err < 0)
+		return dev_err_probe(&pdev->dev, err,
+				     "failed to probe output\n");
 
 	if (sor->ops && sor->ops->probe) {
 		err = sor->ops->probe(sor);

From 5c1d644c09dbc13b2dc652435786e42b05ac1bb7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 13 Oct 2020 10:51:58 +0100
Subject: [PATCH 02/22] drm/tegra: sor: Ensure regulators are disabled on
 teardown

The Tegra SOR driver uses the devm infrastructure to request regulators,
but enables them without registering them with the infrastructure.

This results in the following splat if probing fails for any odd resaon
(such as dependencies not being available):

[    8.974187] tegra-sor 15580000.sor: cannot get HDMI supply: -517
[    9.414403] tegra-sor 15580000.sor: failed to probe HDMI: -517
[    9.421240] ------------[ cut here ]------------
[    9.425879] WARNING: CPU: 1 PID: 164 at drivers/regulator/core.c:2089 _regulator_put.part.0+0x16c/0x174
[    9.435259] Modules linked in: tegra_drm(E+) cec(E) ahci_tegra(E) drm_kms_helper(E) drm(E) libahci_platform(E) libahci(E) max77620_regulator(E) xhci_tegra(E+) sdhci_tegra(E) xhci_hcd(E) libata(E) sdhci_pltfm(E) cqhci(E) fixed(E) usbcore(E) scsi_mod(E) sdhci(E) host1x(E)
[    9.459211] CPU: 1 PID: 164 Comm: systemd-udevd Tainted: G S    D W   E     5.9.0-rc7-00298-gf6337624c4fe #1980
[    9.469285] Hardware name: NVIDIA Jetson TX2 Developer Kit (DT)
[    9.475202] pstate: 80000005 (Nzcv daif -PAN -UAO BTYPE=--)
[    9.480784] pc : _regulator_put.part.0+0x16c/0x174
[    9.485581] lr : regulator_put+0x44/0x60
[    9.489501] sp : ffffffc011d837b0
[    9.492814] x29: ffffffc011d837b0 x28: ffffff81dd085900
[    9.498141] x27: ffffff81de1c8ec0 x26: ffffff81de1c8c10
[    9.503464] x25: ffffff81dd085800 x24: ffffffc008f2c6b0
[    9.508790] x23: ffffffc0117373f0 x22: 0000000000000005
[    9.514101] x21: ffffff81dd085900 x20: ffffffc01172b098
[    9.515822] ata1: SATA link down (SStatus 0 SControl 300)
[    9.519426] x19: ffffff81dd085100 x18: 0000000000000030
[    9.530122] x17: 0000000000000000 x16: 0000000000000000
[    9.535453] x15: 0000000000000000 x14: 000000000000038f
[    9.540777] x13: 0000000000000003 x12: 0000000000000040
[    9.546105] x11: ffffff81eb800000 x10: 0000000000000ae0
[    9.551417] x9 : ffffffc0106fea24 x8 : ffffff81de83e6c0
[    9.556728] x7 : 0000000000000018 x6 : 00000000000003c3
[    9.562064] x5 : 0000000000005660 x4 : 0000000000000000
[    9.567392] x3 : ffffffc01172b388 x2 : ffffff81de83db80
[    9.572702] x1 : 0000000000000000 x0 : 0000000000000001
[    9.578034] Call trace:
[    9.580494]  _regulator_put.part.0+0x16c/0x174
[    9.584940]  regulator_put+0x44/0x60
[    9.588522]  devm_regulator_release+0x20/0x2c
[    9.592885]  release_nodes+0x1c8/0x2c0
[    9.596636]  devres_release_all+0x44/0x6c
[    9.600649]  really_probe+0x1ec/0x504
[    9.604316]  driver_probe_device+0x100/0x170
[    9.608589]  device_driver_attach+0xcc/0xd4
[    9.612774]  __driver_attach+0xb0/0x17c
[    9.616614]  bus_for_each_dev+0x7c/0xd4
[    9.620450]  driver_attach+0x30/0x3c
[    9.624027]  bus_add_driver+0x154/0x250
[    9.627867]  driver_register+0x84/0x140
[    9.631719]  __platform_register_drivers+0xa0/0x180
[    9.636660]  host1x_drm_init+0x60/0x1000 [tegra_drm]
[    9.641629]  do_one_initcall+0x54/0x2d0
[    9.645490]  do_init_module+0x68/0x29c
[    9.649244]  load_module+0x2178/0x26c0
[    9.652997]  __do_sys_finit_module+0xb0/0x120
[    9.657356]  __arm64_sys_finit_module+0x2c/0x40
[    9.661902]  el0_svc_common.constprop.0+0x80/0x240
[    9.666701]  do_el0_svc+0x30/0xa0
[    9.670022]  el0_svc+0x18/0x50
[    9.673081]  el0_sync_handler+0x90/0x318
[    9.677006]  el0_sync+0x158/0x180
[    9.680324] ---[ end trace 90f6c89d62d85ff6 ]---

Instead, let's register a callback that will disable the regulators
on teardown. This allows for the removal of the .remove callbacks,
which are not needed anymore.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 59 +++++++++++++++----------------------
 1 file changed, 24 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index a5b944dacb35..00656d8b98e2 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -397,7 +397,6 @@ struct tegra_sor;
 struct tegra_sor_ops {
 	const char *name;
 	int (*probe)(struct tegra_sor *sor);
-	int (*remove)(struct tegra_sor *sor);
 	void (*audio_enable)(struct tegra_sor *sor);
 	void (*audio_disable)(struct tegra_sor *sor);
 };
@@ -2942,6 +2941,24 @@ static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
 	.atomic_check = tegra_sor_encoder_atomic_check,
 };
 
+static void tegra_sor_disable_regulator(void *data)
+{
+	struct regulator *reg = data;
+
+	regulator_disable(reg);
+}
+
+static int tegra_sor_enable_regulator(struct tegra_sor *sor, struct regulator *reg)
+{
+	int err;
+
+	err = regulator_enable(reg);
+	if (err)
+		return err;
+
+	return devm_add_action_or_reset(sor->dev, tegra_sor_disable_regulator, reg);
+}
+
 static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 {
 	int err;
@@ -2953,7 +2970,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 		return PTR_ERR(sor->avdd_io_supply);
 	}
 
-	err = regulator_enable(sor->avdd_io_supply);
+	err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n",
 			err);
@@ -2967,7 +2984,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 		return PTR_ERR(sor->vdd_pll_supply);
 	}
 
-	err = regulator_enable(sor->vdd_pll_supply);
+	err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n",
 			err);
@@ -2981,7 +2998,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 		return PTR_ERR(sor->hdmi_supply);
 	}
 
-	err = regulator_enable(sor->hdmi_supply);
+	err = tegra_sor_enable_regulator(sor, sor->hdmi_supply);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err);
 		return err;
@@ -2992,19 +3009,9 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 	return 0;
 }
 
-static int tegra_sor_hdmi_remove(struct tegra_sor *sor)
-{
-	regulator_disable(sor->hdmi_supply);
-	regulator_disable(sor->vdd_pll_supply);
-	regulator_disable(sor->avdd_io_supply);
-
-	return 0;
-}
-
 static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
 	.name = "HDMI",
 	.probe = tegra_sor_hdmi_probe,
-	.remove = tegra_sor_hdmi_remove,
 	.audio_enable = tegra_sor_hdmi_audio_enable,
 	.audio_disable = tegra_sor_hdmi_audio_disable,
 };
@@ -3017,7 +3024,7 @@ static int tegra_sor_dp_probe(struct tegra_sor *sor)
 	if (IS_ERR(sor->avdd_io_supply))
 		return PTR_ERR(sor->avdd_io_supply);
 
-	err = regulator_enable(sor->avdd_io_supply);
+	err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply);
 	if (err < 0)
 		return err;
 
@@ -3025,25 +3032,16 @@ static int tegra_sor_dp_probe(struct tegra_sor *sor)
 	if (IS_ERR(sor->vdd_pll_supply))
 		return PTR_ERR(sor->vdd_pll_supply);
 
-	err = regulator_enable(sor->vdd_pll_supply);
+	err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply);
 	if (err < 0)
 		return err;
 
 	return 0;
 }
 
-static int tegra_sor_dp_remove(struct tegra_sor *sor)
-{
-	regulator_disable(sor->vdd_pll_supply);
-	regulator_disable(sor->avdd_io_supply);
-
-	return 0;
-}
-
 static const struct tegra_sor_ops tegra_sor_dp_ops = {
 	.name = "DP",
 	.probe = tegra_sor_dp_probe,
-	.remove = tegra_sor_dp_remove,
 };
 
 static int tegra_sor_init(struct host1x_client *client)
@@ -3773,7 +3771,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		if (err < 0) {
 			dev_err(&pdev->dev, "failed to probe %s: %d\n",
 				sor->ops->name, err);
-			goto output;
+			goto remove;
 		}
 	}
 
@@ -3954,9 +3952,6 @@ static int tegra_sor_probe(struct platform_device *pdev)
 rpm_disable:
 	pm_runtime_disable(&pdev->dev);
 remove:
-	if (sor->ops && sor->ops->remove)
-		sor->ops->remove(sor);
-output:
 	tegra_output_remove(&sor->output);
 	return err;
 }
@@ -3975,12 +3970,6 @@ static int tegra_sor_remove(struct platform_device *pdev)
 
 	pm_runtime_disable(&pdev->dev);
 
-	if (sor->ops && sor->ops->remove) {
-		err = sor->ops->remove(sor);
-		if (err < 0)
-			dev_err(&pdev->dev, "failed to remove SOR: %d\n", err);
-	}
-
 	tegra_output_remove(&sor->output);
 
 	return 0;

From 41f71629b4c432f8dd47d70ace813be5f79d4d75 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <mh12gx2825@gmail.com>
Date: Thu, 5 Nov 2020 23:29:28 +0530
Subject: [PATCH 03/22] drm/tegra: replace idr_init() by idr_init_base()

idr_init() uses base 0 which is an invalid identifier for this driver.
The new function idr_init_base allows IDR to set the ID lookup from
base 1. This avoids all lookups that otherwise starts from 0 since
0 is always unused.

References: commit 6ce711f27500 ("idr: Make 1-based IDRs more efficient")

Signed-off-by: Deepak R Varma <mh12gx2825@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ba9d1c3e7cac..e4baf07992a4 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -90,7 +90,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
 	if (!fpriv)
 		return -ENOMEM;
 
-	idr_init(&fpriv->contexts);
+	idr_init_base(&fpriv->contexts, 1);
 	mutex_init(&fpriv->lock);
 	filp->driver_priv = fpriv;
 

From 123f01a0c989905a1cef6c1397a022eb321474d8 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 13 Nov 2020 21:38:35 +0100
Subject: [PATCH 04/22] drm/tegra: output: Do not put OF node twice

The original patch for commit 3d2e7aec7013 ("drm/tegra: output: Don't
leak OF node on error") contained this hunk, but it was accidentally
dropped during conflict resolution. This causes use-after-free errors
on devices that use an I2C controller for HDMI DDC/CI on Tegra210 and
later.

Fixes: 3d2e7aec7013 ("drm/tegra: output: Don't leak OF node on error")
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/output.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 5a4fd0dbf4cf..47d26b5d9945 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -129,7 +129,6 @@ int tegra_output_probe(struct tegra_output *output)
 
 		if (!output->ddc) {
 			err = -EPROBE_DEFER;
-			of_node_put(ddc);
 			return err;
 		}
 	}

From 777ee15e88616c275ba59db88d3ece20eae0ca9a Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 20 Nov 2020 22:13:06 +0100
Subject: [PATCH 05/22] drm: mxsfb: fix fence synchronization

The conversion away from the simple display pipeline helper missed
to convert the prepare_fb plane callback, so no fences are attached to
the atomic state, breaking synchronization with other devices. Fix
this by plugging in the drm_gem_fb_prepare_fb helper function.

Fixes: ae1ed0093281 ("drm: mxsfb: Stop using DRM simple display pipeline helper")
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20201120211306.325841-1-l.stach@pengutronix.de
---
 drivers/gpu/drm/mxsfb/mxsfb_kms.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
index b721b8b262ce..4d556532281a 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
@@ -22,6 +22,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_vblank.h>
@@ -485,11 +486,13 @@ static void mxsfb_plane_overlay_atomic_update(struct drm_plane *plane,
 }
 
 static const struct drm_plane_helper_funcs mxsfb_plane_primary_helper_funcs = {
+	.prepare_fb = drm_gem_fb_prepare_fb,
 	.atomic_check = mxsfb_plane_atomic_check,
 	.atomic_update = mxsfb_plane_primary_atomic_update,
 };
 
 static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = {
+	.prepare_fb = drm_gem_fb_prepare_fb,
 	.atomic_check = mxsfb_plane_atomic_check,
 	.atomic_update = mxsfb_plane_overlay_atomic_update,
 };

From c70582bbf299986234ecf06d128454b4b38ecd2e Mon Sep 17 00:00:00 2001
From: Daniel Abrecht <public@danielabrecht.ch>
Date: Sun, 8 Nov 2020 21:00:01 +0000
Subject: [PATCH 06/22] drm: mxsfb: Implement .format_mod_supported

This will make sure applications which use the IN_FORMATS blob
to figure out which modifiers they can use will pick up the
linear modifier which is needed by mxsfb. Such applications
will not work otherwise if an incompatible implicit modifier
ends up being selected.

Before commit ae1ed0093281 ("drm: mxsfb: Stop using DRM simple
display pipeline helper"), the DRM simple display pipeline
helper took care of this.

Signed-off-by: Daniel Abrecht <public@danielabrecht.ch>
Fixes: ae1ed0093281 ("drm: mxsfb: Stop using DRM simple display pipeline helper")
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/2a99ffffc2378209307e0992a6e97e70@nodmarc.danielabrecht.ch
---
 drivers/gpu/drm/mxsfb/mxsfb_kms.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
index 4d556532281a..9e1224d54729 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
@@ -485,6 +485,13 @@ static void mxsfb_plane_overlay_atomic_update(struct drm_plane *plane,
 	writel(ctrl, mxsfb->base + LCDC_AS_CTRL);
 }
 
+static bool mxsfb_format_mod_supported(struct drm_plane *plane,
+				       uint32_t format,
+				       uint64_t modifier)
+{
+	return modifier == DRM_FORMAT_MOD_LINEAR;
+}
+
 static const struct drm_plane_helper_funcs mxsfb_plane_primary_helper_funcs = {
 	.prepare_fb = drm_gem_fb_prepare_fb,
 	.atomic_check = mxsfb_plane_atomic_check,
@@ -498,6 +505,7 @@ static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = {
 };
 
 static const struct drm_plane_funcs mxsfb_plane_funcs = {
+	.format_mod_supported	= mxsfb_format_mod_supported,
 	.update_plane		= drm_atomic_helper_update_plane,
 	.disable_plane		= drm_atomic_helper_disable_plane,
 	.destroy		= drm_plane_cleanup,

From aea656b0d05ec5b8ed5beb2f94c4dd42ea834e9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 26 Nov 2020 13:35:08 +0100
Subject: [PATCH 07/22] drm/nouveau: make sure ret is initialized in
 nouveau_ttm_io_mem_reserve
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This wasn't initialized for pre NV50 hardware.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reported-and-Tested-by: Mark Hounschell <markh@compro.net>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Link: https://patchwork.freedesktop.org/series/84298/
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 2ee75646ad6f..d02f3a58adb6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1215,8 +1215,8 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_resource *reg)
 			}
 
 			reg->bus.offset = handle;
-			ret = 0;
 		}
+		ret = 0;
 		break;
 	default:
 		ret = -EINVAL;

From bf3a3cdcad40e5928a22ea0fd200d17fd6d6308d Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Fri, 30 Oct 2020 09:34:24 +0800
Subject: [PATCH 08/22] drm/tegra: sor: Disable clocks on error in
 tegra_sor_init()

Fix the missing clk_disable_unprepare() before return from
tegra_sor_init() in the error handling case.

Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 00656d8b98e2..cc2aa2308a51 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3143,6 +3143,7 @@ static int tegra_sor_init(struct host1x_client *client)
 		if (err < 0) {
 			dev_err(sor->dev, "failed to deassert SOR reset: %d\n",
 				err);
+			clk_disable_unprepare(sor->clk);
 			return err;
 		}
 
@@ -3150,12 +3151,17 @@ static int tegra_sor_init(struct host1x_client *client)
 	}
 
 	err = clk_prepare_enable(sor->clk_safe);
-	if (err < 0)
+	if (err < 0) {
+		clk_disable_unprepare(sor->clk);
 		return err;
+	}
 
 	err = clk_prepare_enable(sor->clk_dp);
-	if (err < 0)
+	if (err < 0) {
+		clk_disable_unprepare(sor->clk_safe);
+		clk_disable_unprepare(sor->clk);
 		return err;
+	}
 
 	return 0;
 }

From aec9fe892812ed10d0bffcf309d2a8fc380d8ce6 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Tue, 10 Nov 2020 21:04:30 +0100
Subject: [PATCH 09/22] drm/rockchip: Avoid uninitialized use of endpoint id in
 LVDS

In the Rockchip DRM LVDS component driver, the endpoint id provided to
drm_of_find_panel_or_bridge is grabbed from the endpoint's reg property.

However, the property may be missing in the case of a single endpoint.
Initialize the endpoint_id variable to 0 to avoid using an
uninitialized variable in that case.

Fixes: 34cc0aa25456 ("drm/rockchip: Add support for Rockchip Soc LVDS")
Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20201110200430.1713467-1-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/rockchip/rockchip_lvds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c
index f292c6a6e20f..41edd0a421b2 100644
--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
+++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
@@ -544,7 +544,7 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master,
 	struct device_node  *port, *endpoint;
 	int ret = 0, child_count = 0;
 	const char *name;
-	u32 endpoint_id;
+	u32 endpoint_id = 0;
 
 	lvds->drm_dev = drm_dev;
 	port = of_graph_get_port_by_id(dev->of_node, 1);

From 7c4bada12d320d8648ba3ede6f9b6f9e10f1126a Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Fri, 27 Nov 2020 21:04:29 +0100
Subject: [PATCH 10/22] drm/panel: sony-acx565akm: Fix race condition in probe

The probe routine acquires the reset GPIO using GPIOD_OUT_LOW. Directly
afterwards it calls acx565akm_detect(), which sets the GPIO value to
HIGH. If the bootloader initialized the GPIO to HIGH before the probe
routine was called, there is only a very short time period of a few
instructions where the reset signal is LOW. Exact time depends on
compiler optimizations, kernel configuration and alignment of the stars,
but I expect it to be always way less than 10us. There are no public
datasheets for the panel, but acx565akm_power_on() has a comment with
timings and reset period should be at least 10us. So this potentially
brings the panel into a half-reset state.

The result is, that panel may not work after boot and can get into a
working state by re-enabling it (e.g. by blanking + unblanking), since
that does a clean reset cycle. This bug has recently been hit by Ivaylo
Dimitrov, but there are some older reports which are probably the same
bug. At least Tony Lindgren, Peter Ujfalusi and Jarkko Nikula have
experienced it in 2017 describing the blank/unblank procedure as
possible workaround.

Note, that the bug really goes back in time. It has originally been
introduced in the predecessor of the omapfb driver in commit 3c45d05be382
("OMAPDSS: acx565akm panel: handle gpios in panel driver") in 2012.
That driver eventually got replaced by a newer one, which had the bug
from the beginning in commit 84192742d9c2 ("OMAPDSS: Add Sony ACX565AKM
panel driver") and still exists in fbdev world. That driver has later
been copied to omapdrm and then was used as a basis for this driver.
Last but not least the omapdrm specific driver has been removed in
commit 45f16c82db7e ("drm/omap: displays: Remove unused panel drivers").

Reported-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Reported-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reported-by: Tony Lindgren <tony@atomide.com>
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reported-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Cc: Merlijn Wajer <merlijn@wizzup.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Fixes: 1c8fc3f0c5d2 ("drm/panel: Add driver for the Sony ACX565AKM panel")
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Tested-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Tested-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20201127200429.129868-1-sebastian.reichel@collabora.com
---
 drivers/gpu/drm/panel/panel-sony-acx565akm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
index e95fdfb16b6c..ba0b3ead150f 100644
--- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c
+++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
@@ -629,7 +629,7 @@ static int acx565akm_probe(struct spi_device *spi)
 	lcd->spi = spi;
 	mutex_init(&lcd->mutex);
 
-	lcd->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+	lcd->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_HIGH);
 	if (IS_ERR(lcd->reset_gpio)) {
 		dev_err(&spi->dev, "failed to get reset GPIO\n");
 		return PTR_ERR(lcd->reset_gpio);

From fd4e788e971ce763e50762d7b1a0048992949dd0 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Fri, 27 Nov 2020 10:52:41 +0200
Subject: [PATCH 11/22] drm/omap: sdi: fix bridge enable/disable

When the SDI output was converted to DRM bridge, the atomic versions of
enable and disable funcs were used. This was not intended, as that would
require implementing other atomic funcs too. This leads to:

WARNING: CPU: 0 PID: 18 at drivers/gpu/drm/drm_bridge.c:708 drm_atomic_helper_commit_modeset_enables+0x134/0x268

and display not working.

Fix this by using the legacy enable/disable funcs.

Fixes: 8bef8a6d5da81b909a190822b96805a47348146f ("drm/omap: sdi: Register a drm_bridge")
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Tested-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: stable@vger.kernel.org # v5.7+
Link: https://patchwork.freedesktop.org/patch/msgid/20201127085241.848461-1-tomi.valkeinen@ti.com
---
 drivers/gpu/drm/omapdrm/dss/sdi.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c
index 033fd30074b0..282e4c837cd9 100644
--- a/drivers/gpu/drm/omapdrm/dss/sdi.c
+++ b/drivers/gpu/drm/omapdrm/dss/sdi.c
@@ -195,8 +195,7 @@ static void sdi_bridge_mode_set(struct drm_bridge *bridge,
 	sdi->pixelclock = adjusted_mode->clock * 1000;
 }
 
-static void sdi_bridge_enable(struct drm_bridge *bridge,
-			      struct drm_bridge_state *bridge_state)
+static void sdi_bridge_enable(struct drm_bridge *bridge)
 {
 	struct sdi_device *sdi = drm_bridge_to_sdi(bridge);
 	struct dispc_clock_info dispc_cinfo;
@@ -259,8 +258,7 @@ static void sdi_bridge_enable(struct drm_bridge *bridge,
 	regulator_disable(sdi->vdds_sdi_reg);
 }
 
-static void sdi_bridge_disable(struct drm_bridge *bridge,
-			       struct drm_bridge_state *bridge_state)
+static void sdi_bridge_disable(struct drm_bridge *bridge)
 {
 	struct sdi_device *sdi = drm_bridge_to_sdi(bridge);
 
@@ -278,8 +276,8 @@ static const struct drm_bridge_funcs sdi_bridge_funcs = {
 	.mode_valid = sdi_bridge_mode_valid,
 	.mode_fixup = sdi_bridge_mode_fixup,
 	.mode_set = sdi_bridge_mode_set,
-	.atomic_enable = sdi_bridge_enable,
-	.atomic_disable = sdi_bridge_disable,
+	.enable = sdi_bridge_enable,
+	.disable = sdi_bridge_disable,
 };
 
 static void sdi_bridge_init(struct sdi_device *sdi)

From 777a7717d60ccdc9b84f35074f848d3f746fc3bf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 26 Nov 2020 14:08:41 +0000
Subject: [PATCH 12/22] drm/i915/gt: Program mocs:63 for cache eviction on gen9
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ville noticed that the last mocs entry is used unconditionally by the HW
when it performs cache evictions, and noted that while the value is not
meant to be writable by the driver, we should program it to a reasonable
value nevertheless.

As it turns out, we can change the value of mocs:63 and the value we
were programming into it would cause hard hangs in conjunction with
atomic operations.

v2: Add details from bspec about how it is used by HW

Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2707
Fixes: 3bbaba0ceaa2 ("drm/i915: Added Programming of the MOCS")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: <stable@vger.kernel.org> # v4.3+
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201126140841.1982-1-chris@chris-wilson.co.uk
(cherry picked from commit 977933b5da7c16f39295c4c1d4259a58ece65dbe)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 313e51e7d4f7..4f74706967fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -131,7 +131,19 @@ static const struct drm_i915_mocs_entry skl_mocs_table[] = {
 	GEN9_MOCS_ENTRIES,
 	MOCS_ENTRY(I915_MOCS_CACHED,
 		   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-		   L3_3_WB)
+		   L3_3_WB),
+
+	/*
+	 * mocs:63
+	 * - used by the L3 for all of its evictions.
+	 *   Thus it is expected to allow LLC cacheability to enable coherent
+	 *   flows to be maintained.
+	 * - used to force L3 uncachable cycles.
+	 *   Thus it is expected to make the surface L3 uncacheable.
+	 */
+	MOCS_ENTRY(63,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC)
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */

From 9261a1db80bc81dd445cd6dcfb466b632ad9faa8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 26 Nov 2020 14:04:05 +0000
Subject: [PATCH 13/22] drm/i915/gt: Protect context lifetime with RCU

Allow a brief period for continued access to a dead intel_context by
deferring the release of the struct until after an RCU grace period.
As we are using a dedicated slab cache for the contexts, we can defer
the release of the slab pages via RCU, with the caveat that individual
structs may be reused from the freelist within an RCU grace period. To
handle that, we have to avoid clearing members of the zombie struct.

This is required for a later patch to handle locking around virtual
requests in the signaler, as those requests may want to move between
engines and be destroyed while we are holding b->irq_lock on a physical
engine.

v2: Drop mutex_reinit(), if we never mark the mutex as destroyed we
don't need to reset the debug code, at the loss of having the mutex
debug code spot us attempting to destroy a locked mutex.
v3: As the intended use will remain strongly referenced counted, with
very little inflight access across reuse, drop the ctor.
v4: Drop the unrequired change to remove the temporary reference around
dropping the active context, and add back some more missing ctor
operations.
v5: The ctor is back. Tvrtko spotted that ce->signal_lock [introduced
later] maybe accessed under RCU and so needs special care not to be
reinitialised.
v6: Don't mix SLAB_TYPESAFE_BY_RCU and RCU list iteration.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201126140407.31952-3-chris@chris-wilson.co.uk
(cherry picked from commit 14d1eaf08845c534963c83f754afe0cb14cb2512)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 12 +++++++++---
 drivers/gpu/drm/i915/gt/intel_context_types.h | 11 ++++++++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 92a3f25c4006..d3a835212167 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -25,9 +25,16 @@ static struct intel_context *intel_context_alloc(void)
 	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
 }
 
+static void rcu_context_free(struct rcu_head *rcu)
+{
+	struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
+
+	kmem_cache_free(global.slab_ce, ce);
+}
+
 void intel_context_free(struct intel_context *ce)
 {
-	kmem_cache_free(global.slab_ce, ce);
+	call_rcu(&ce->rcu, rcu_context_free);
 }
 
 struct intel_context *
@@ -356,8 +363,7 @@ static int __intel_context_active(struct i915_active *active)
 }
 
 void
-intel_context_init(struct intel_context *ce,
-		   struct intel_engine_cs *engine)
+intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 {
 	GEM_BUG_ON(!engine->cops);
 	GEM_BUG_ON(!engine->gt->vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 552cb57a2e8c..20cb5835d1c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -44,7 +44,16 @@ struct intel_context_ops {
 };
 
 struct intel_context {
-	struct kref ref;
+	/*
+	 * Note: Some fields may be accessed under RCU.
+	 *
+	 * Unless otherwise noted a field can safely be assumed to be protected
+	 * by strong reference counting.
+	 */
+	union {
+		struct kref ref; /* no kref_get_unless_zero()! */
+		struct rcu_head rcu;
+	};
 
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;

From 2bfdf302465a5eab941e551e2869a96bb473f66f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 26 Nov 2020 14:04:06 +0000
Subject: [PATCH 14/22] drm/i915/gt: Split the breadcrumb spinlock between
 global and contexts

As we funnel more and more contexts into the breadcrumbs on an engine,
the hold time of b->irq_lock grows. As we may then contend with the
b->irq_lock during request submission, this increases the burden upon
the engine->active.lock and so directly impacts both our execution
latency and client latency. If we split the b->irq_lock by introducing a
per-context spinlock to manage the signalers within a context, we then
only need the b->irq_lock for enabling/disabling the interrupt and can
avoid taking the lock for walking the list of contexts within the signal
worker. Even with the current setup, this greatly reduces the number of
times we have to take and fight for b->irq_lock.

Furthermore, this closes the race between enabling the signaling context
while it is in the process of being signaled and removed:

<4>[  416.208555] list_add corruption. prev->next should be next (ffff8881951d5910), but was dead000000000100. (prev=ffff8882781bb870).
<4>[  416.208573] WARNING: CPU: 7 PID: 0 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70
<4>[  416.208575] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
<4>[  416.208611] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G     U            5.8.0-CI-CI_DRM_8852+ #1
<4>[  416.208614] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
<4>[  416.208627] RIP: 0010:__list_add_valid+0x4d/0x70
<4>[  416.208631] Code: c3 48 89 d1 48 c7 c7 60 18 33 82 48 89 c2 e8 ea e0 b6 ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 b0 18 33 82 e8 d3 e0 b6 ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 00 19 33 82 e8
<4>[  416.208633] RSP: 0018:ffffc90000280e18 EFLAGS: 00010086
<4>[  416.208636] RAX: 0000000000000000 RBX: ffff888250a44880 RCX: 0000000000000105
<4>[  416.208639] RDX: 0000000000000105 RSI: ffffffff82320c5b RDI: 00000000ffffffff
<4>[  416.208641] RBP: ffff8882781bb870 R08: 0000000000000000 R09: 0000000000000001
<4>[  416.208643] R10: 00000000054d2957 R11: 000000006abbd991 R12: ffff8881951d58c8
<4>[  416.208646] R13: ffff888286073880 R14: ffff888286073848 R15: ffff8881951d5910
<4>[  416.208669] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
<4>[  416.208671] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  416.208673] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
<4>[  416.208675] PKRU: 55555554
<4>[  416.208677] Call Trace:
<4>[  416.208679]  <IRQ>
<4>[  416.208751]  i915_request_enable_breadcrumb+0x278/0x400 [i915]
<4>[  416.208839]  __i915_request_submit+0xca/0x2a0 [i915]
<4>[  416.208892]  __execlists_submission_tasklet+0x480/0x1830 [i915]
<4>[  416.208942]  execlists_submission_tasklet+0xc4/0x130 [i915]
<4>[  416.208947]  tasklet_action_common.isra.17+0x6c/0x1c0
<4>[  416.208954]  __do_softirq+0xdf/0x498
<4>[  416.208960]  ? handle_fasteoi_irq+0x150/0x150
<4>[  416.208964]  asm_call_on_stack+0xf/0x20
<4>[  416.208966]  </IRQ>
<4>[  416.208969]  do_softirq_own_stack+0xa1/0xc0
<4>[  416.208972]  irq_exit_rcu+0xb5/0xc0
<4>[  416.208976]  common_interrupt+0xf7/0x260
<4>[  416.208980]  asm_common_interrupt+0x1e/0x40
<4>[  416.208985] RIP: 0010:cpuidle_enter_state+0xb6/0x410
<4>[  416.208987] Code: 00 31 ff e8 9c 3e 89 ff 80 7c 24 0b 00 74 12 9c 58 f6 c4 02 0f 85 31 03 00 00 31 ff e8 e3 6c 90 ff e8 fe a4 94 ff fb 45 85 ed <0f> 88 c7 02 00 00 49 63 c5 4c 2b 24 24 48 8d 14 40 48 8d 14 90 48
<4>[  416.208989] RSP: 0018:ffffc90000143e70 EFLAGS: 00000206
<4>[  416.208991] RAX: 0000000000000007 RBX: ffffe8ffffda8070 RCX: 0000000000000000
<4>[  416.208993] RDX: 0000000000000000 RSI: ffffffff8238b4ee RDI: ffffffff8233184f
<4>[  416.208995] RBP: ffffffff826b4e00 R08: 0000000000000000 R09: 0000000000000000
<4>[  416.208997] R10: 0000000000000001 R11: 0000000000000000 R12: 00000060e7f24a8f
<4>[  416.208998] R13: 0000000000000003 R14: 0000000000000003 R15: 0000000000000003
<4>[  416.209012]  cpuidle_enter+0x24/0x40
<4>[  416.209016]  do_idle+0x22f/0x2d0
<4>[  416.209022]  cpu_startup_entry+0x14/0x20
<4>[  416.209025]  start_secondary+0x158/0x1a0
<4>[  416.209030]  secondary_startup_64+0xa4/0xb0
<4>[  416.209039] irq event stamp: 10186977
<4>[  416.209042] hardirqs last  enabled at (10186976): [<ffffffff810b9363>] tasklet_action_common.isra.17+0xe3/0x1c0
<4>[  416.209044] hardirqs last disabled at (10186977): [<ffffffff81a5e5ed>] _raw_spin_lock_irqsave+0xd/0x50
<4>[  416.209047] softirqs last  enabled at (10186968): [<ffffffff810b9a1a>] irq_enter_rcu+0x6a/0x70
<4>[  416.209049] softirqs last disabled at (10186969): [<ffffffff81c00f4f>] asm_call_on_stack+0xf/0x20

<4>[  416.209317] list_del corruption, ffff8882781bb870->next is LIST_POISON1 (dead000000000100)
<4>[  416.209317] WARNING: CPU: 7 PID: 46 at lib/list_debug.c:47 __list_del_entry_valid+0x4e/0x90
<4>[  416.209317] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
<4>[  416.209317] CPU: 7 PID: 46 Comm: ksoftirqd/7 Tainted: G     U  W         5.8.0-CI-CI_DRM_8852+ #1
<4>[  416.209317] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
<4>[  416.209317] RIP: 0010:__list_del_entry_valid+0x4e/0x90
<4>[  416.209317] Code: 2e 48 8b 32 48 39 fe 75 3a 48 8b 50 08 48 39 f2 75 48 b8 01 00 00 00 c3 48 89 fe 48 89 c2 48 c7 c7 38 19 33 82 e8 62 e0 b6 ff <0f> 0b 31 c0 c3 48 89 fe 48 c7 c7 70 19 33 82 e8 4e e0 b6 ff 0f 0b
<4>[  416.209317] RSP: 0018:ffffc90000280de8 EFLAGS: 00010086
<4>[  416.209317] RAX: 0000000000000000 RBX: ffff8882781bb848 RCX: 0000000000010104
<4>[  416.209317] RDX: 0000000000010104 RSI: ffffffff8238b4ee RDI: 00000000ffffffff
<4>[  416.209317] RBP: ffff8882781bb880 R08: 0000000000000000 R09: 0000000000000001
<4>[  416.209317] R10: 000000009fb6666e R11: 00000000feca9427 R12: ffffc90000280e18
<4>[  416.209317] R13: ffff8881951d5930 R14: dead0000000000d8 R15: ffff8882781bb880
<4>[  416.209317] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
<4>[  416.209317] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  416.209317] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
<4>[  416.209317] PKRU: 55555554
<4>[  416.209317] Call Trace:
<4>[  416.209317]  <IRQ>
<4>[  416.209317]  remove_signaling_context.isra.13+0xd/0x70 [i915]
<4>[  416.209513]  signal_irq_work+0x1f7/0x4b0 [i915]

This is caused by virtual engines where although we take the breadcrumb
lock on each of the active engines, they may be different engines on
different requests, It turns out that the b->irq_lock was not a
sufficient proxy for the engine->active.lock in the case of more than
one request, so introduce an explicit lock around ce->signals.

v2: ce->signal_lock is acquired with only RCU protection and so must be
treated carefully and not cleared during reallocation. We also then need
to confirm that the ce we lock is the same as we found in the breadcrumb
list.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2276
Fixes: c18636f76344 ("drm/i915: Remove requirement for holding i915_request.lock for breadcrumbs")
Fixes: 2854d866327a ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201126140407.31952-4-chris@chris-wilson.co.uk
(cherry picked from commit c744d50363b714783bbc88d986cc16def13710f7)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 168 ++++++++----------
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |   6 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |   3 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  12 +-
 drivers/gpu/drm/i915/i915_request.h           |   6 +-
 5 files changed, 90 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index cf6e05ea4d8f..a24cc1ff08a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -101,18 +101,37 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 	intel_gt_pm_put_async(b->irq_engine->gt);
 }
 
+static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
+	spin_lock(&b->irq_lock);
+	if (b->irq_armed)
+		__intel_breadcrumbs_disarm_irq(b);
+	spin_unlock(&b->irq_lock);
+}
+
 static void add_signaling_context(struct intel_breadcrumbs *b,
 				  struct intel_context *ce)
 {
-	intel_context_get(ce);
-	list_add_tail(&ce->signal_link, &b->signalers);
+	lockdep_assert_held(&ce->signal_lock);
+
+	spin_lock(&b->signalers_lock);
+	list_add_rcu(&ce->signal_link, &b->signalers);
+	spin_unlock(&b->signalers_lock);
 }
 
-static void remove_signaling_context(struct intel_breadcrumbs *b,
+static bool remove_signaling_context(struct intel_breadcrumbs *b,
 				     struct intel_context *ce)
 {
-	list_del(&ce->signal_link);
-	intel_context_put(ce);
+	lockdep_assert_held(&ce->signal_lock);
+
+	if (!list_empty(&ce->signals))
+		return false;
+
+	spin_lock(&b->signalers_lock);
+	list_del_rcu(&ce->signal_link);
+	spin_unlock(&b->signalers_lock);
+
+	return true;
 }
 
 static inline bool __request_completed(const struct i915_request *rq)
@@ -175,6 +194,8 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 
 static bool __signal_request(struct i915_request *rq)
 {
+	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
 	if (!__dma_fence_signal(&rq->fence)) {
 		i915_request_put(rq);
 		return false;
@@ -195,15 +216,12 @@ static void signal_irq_work(struct irq_work *work)
 	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 	const ktime_t timestamp = ktime_get();
 	struct llist_node *signal, *sn;
-	struct intel_context *ce, *cn;
-	struct list_head *pos, *next;
+	struct intel_context *ce;
 
 	signal = NULL;
 	if (unlikely(!llist_empty(&b->signaled_requests)))
 		signal = llist_del_all(&b->signaled_requests);
 
-	spin_lock(&b->irq_lock);
-
 	/*
 	 * Keep the irq armed until the interrupt after all listeners are gone.
 	 *
@@ -229,47 +247,44 @@ static void signal_irq_work(struct irq_work *work)
 	 * interrupt draw less ire from other users of the system and tools
 	 * like powertop.
 	 */
-	if (!signal && b->irq_armed && list_empty(&b->signalers))
-		__intel_breadcrumbs_disarm_irq(b);
+	if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
+		intel_breadcrumbs_disarm_irq(b);
 
-	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
-		GEM_BUG_ON(list_empty(&ce->signals));
+	rcu_read_lock();
+	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+		struct i915_request *rq;
 
-		list_for_each_safe(pos, next, &ce->signals) {
-			struct i915_request *rq =
-				list_entry(pos, typeof(*rq), signal_link);
+		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
+			bool release;
 
-			GEM_BUG_ON(!check_signal_order(ce, rq));
 			if (!__request_completed(rq))
 				break;
 
+			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
+						&rq->fence.flags))
+				break;
+
 			/*
 			 * Queue for execution after dropping the signaling
 			 * spinlock as the callback chain may end up adding
 			 * more signalers to the same context or engine.
 			 */
-			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+			spin_lock(&ce->signal_lock);
+			list_del_rcu(&rq->signal_link);
+			release = remove_signaling_context(b, ce);
+			spin_unlock(&ce->signal_lock);
+
 			if (__signal_request(rq))
 				/* We own signal_node now, xfer to local list */
 				signal = slist_add(&rq->signal_node, signal);
-		}
 
-		/*
-		 * We process the list deletion in bulk, only using a list_add
-		 * (not list_move) above but keeping the status of
-		 * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
-		 */
-		if (!list_is_first(pos, &ce->signals)) {
-			/* Advance the list to the first incomplete request */
-			__list_del_many(&ce->signals, pos);
-			if (&ce->signals == pos) { /* now empty */
+			if (release) {
 				add_retire(b, ce->timeline);
-				remove_signaling_context(b, ce);
+				intel_context_put(ce);
 			}
 		}
 	}
-
-	spin_unlock(&b->irq_lock);
+	rcu_read_unlock();
 
 	llist_for_each_safe(signal, sn, signal) {
 		struct i915_request *rq =
@@ -298,14 +313,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 	if (!b)
 		return NULL;
 
-	spin_lock_init(&b->irq_lock);
+	b->irq_engine = irq_engine;
+
+	spin_lock_init(&b->signalers_lock);
 	INIT_LIST_HEAD(&b->signalers);
 	init_llist_head(&b->signaled_requests);
 
+	spin_lock_init(&b->irq_lock);
 	init_irq_work(&b->irq_work, signal_irq_work);
 
-	b->irq_engine = irq_engine;
-
 	return b;
 }
 
@@ -347,9 +363,9 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 	kfree(b);
 }
 
-static void insert_breadcrumb(struct i915_request *rq,
-			      struct intel_breadcrumbs *b)
+static void insert_breadcrumb(struct i915_request *rq)
 {
+	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
 	struct intel_context *ce = rq->context;
 	struct list_head *pos;
 
@@ -371,6 +387,7 @@ static void insert_breadcrumb(struct i915_request *rq,
 	}
 
 	if (list_empty(&ce->signals)) {
+		intel_context_get(ce);
 		add_signaling_context(b, ce);
 		pos = &ce->signals;
 	} else {
@@ -396,8 +413,9 @@ static void insert_breadcrumb(struct i915_request *rq,
 				break;
 		}
 	}
-	list_add(&rq->signal_link, pos);
+	list_add_rcu(&rq->signal_link, pos);
 	GEM_BUG_ON(!check_signal_order(ce, rq));
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
 	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 
 	/*
@@ -410,7 +428,7 @@ static void insert_breadcrumb(struct i915_request *rq,
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b;
+	struct intel_context *ce = rq->context;
 
 	/* Serialises with i915_request_retire() using rq->lock */
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@@ -425,67 +443,30 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
 		return true;
 
-	/*
-	 * rq->engine is locked by rq->engine->active.lock. That however
-	 * is not known until after rq->engine has been dereferenced and
-	 * the lock acquired. Hence we acquire the lock and then validate
-	 * that rq->engine still matches the lock we hold for it.
-	 *
-	 * Here, we are using the breadcrumb lock as a proxy for the
-	 * rq->engine->active.lock, and we know that since the breadcrumb
-	 * will be serialised within i915_request_submit/i915_request_unsubmit,
-	 * the engine cannot change while active as long as we hold the
-	 * breadcrumb lock on that engine.
-	 *
-	 * From the dma_fence_enable_signaling() path, we are outside of the
-	 * request submit/unsubmit path, and so we must be more careful to
-	 * acquire the right lock.
-	 */
-	b = READ_ONCE(rq->engine)->breadcrumbs;
-	spin_lock(&b->irq_lock);
-	while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
-		spin_unlock(&b->irq_lock);
-		b = READ_ONCE(rq->engine)->breadcrumbs;
-		spin_lock(&b->irq_lock);
-	}
-
-	/*
-	 * Now that we are finally serialised with request submit/unsubmit,
-	 * [with b->irq_lock] and with i915_request_retire() [via checking
-	 * SIGNALED with rq->lock] confirm the request is indeed active. If
-	 * it is no longer active, the breadcrumb will be attached upon
-	 * i915_request_submit().
-	 */
+	spin_lock(&ce->signal_lock);
 	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
-		insert_breadcrumb(rq, b);
-
-	spin_unlock(&b->irq_lock);
+		insert_breadcrumb(rq);
+	spin_unlock(&ce->signal_lock);
 
 	return true;
 }
 
 void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
+	struct intel_context *ce = rq->context;
+	bool release;
 
-	/*
-	 * We must wait for b->irq_lock so that we know the interrupt handler
-	 * has released its reference to the intel_context and has completed
-	 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
-	 * required).
-	 */
-	spin_lock(&b->irq_lock);
-	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
-		struct intel_context *ce = rq->context;
+	if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+		return;
 
-		list_del(&rq->signal_link);
-		if (list_empty(&ce->signals))
-			remove_signaling_context(b, ce);
+	spin_lock(&ce->signal_lock);
+	list_del_rcu(&rq->signal_link);
+	release = remove_signaling_context(rq->engine->breadcrumbs, ce);
+	spin_unlock(&ce->signal_lock);
+	if (release)
+		intel_context_put(ce);
 
-		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-		i915_request_put(rq);
-	}
-	spin_unlock(&b->irq_lock);
+	i915_request_put(rq);
 }
 
 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
@@ -495,18 +476,17 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
 
 	drm_printf(p, "Signals:\n");
 
-	spin_lock_irq(&b->irq_lock);
-	list_for_each_entry(ce, &b->signalers, signal_link) {
-		list_for_each_entry(rq, &ce->signals, signal_link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+		list_for_each_entry_rcu(rq, &ce->signals, signal_link)
 			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
 				   rq->fence.context, rq->fence.seqno,
 				   i915_request_completed(rq) ? "!" :
 				   i915_request_started(rq) ? "*" :
 				   "",
 				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
-		}
 	}
-	spin_unlock_irq(&b->irq_lock);
+	rcu_read_unlock();
 }
 
 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 3fa19820b37a..a74bb3062bd8 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -29,18 +29,16 @@
  * the overhead of waking that client is much preferred.
  */
 struct intel_breadcrumbs {
-	spinlock_t irq_lock; /* protects the lists used in hardirq context */
-
 	/* Not all breadcrumbs are attached to physical HW */
 	struct intel_engine_cs *irq_engine;
 
+	spinlock_t signalers_lock; /* protects the list of signalers */
 	struct list_head signalers;
 	struct llist_head signaled_requests;
 
+	spinlock_t irq_lock; /* protects the interrupt from hardirq context */
 	struct irq_work irq_work; /* for use from inside irq_lock */
-
 	unsigned int irq_enabled;
-
 	bool irq_armed;
 };
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index d3a835212167..349e7fa1488d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -379,7 +379,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 
 	ce->vm = i915_vm_get(engine->gt->vm);
 
-	INIT_LIST_HEAD(&ce->signal_link);
+	/* NB ce->signal_link/lock is used under RCU */
+	spin_lock_init(&ce->signal_lock);
 	INIT_LIST_HEAD(&ce->signals);
 
 	mutex_init(&ce->pin_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 20cb5835d1c3..52fa9c132746 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -25,6 +25,7 @@ DECLARE_EWMA(runtime, 3, 8);
 struct i915_gem_context;
 struct i915_gem_ww_ctx;
 struct i915_vma;
+struct intel_breadcrumbs;
 struct intel_context;
 struct intel_ring;
 
@@ -63,8 +64,15 @@ struct intel_context {
 	struct i915_address_space *vm;
 	struct i915_gem_context __rcu *gem_context;
 
-	struct list_head signal_link;
-	struct list_head signals;
+	/*
+	 * @signal_lock protects the list of requests that need signaling,
+	 * @signals. While there are any requests that need signaling,
+	 * we add the context to the breadcrumbs worker, and remove it
+	 * upon completion/cancellation of the last request.
+	 */
+	struct list_head signal_link; /* Accessed under RCU */
+	struct list_head signals; /* Guarded by signal_lock */
+	spinlock_t signal_lock; /* protects signals, the list of requests */
 
 	struct i915_vma *state;
 	struct intel_ring *ring;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 874af6db6103..620b6fab2c5c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -177,10 +177,8 @@ struct i915_request {
 	struct intel_ring *ring;
 	struct intel_timeline __rcu *timeline;
 
-	union {
-		struct list_head signal_link;
-		struct llist_node signal_node;
-	};
+	struct list_head signal_link;
+	struct llist_node signal_node;
 
 	/*
 	 * The rcu epoch of when this request was allocated. Used to judiciously

From 78b2eb8a1f10f366681acad8d21c974c1f66791a Mon Sep 17 00:00:00 2001
From: Venkata Ramana Nayana <venkata.ramana.nayana@intel.com>
Date: Fri, 27 Nov 2020 12:07:16 +0000
Subject: [PATCH 15/22] drm/i915/gt: Retain default context state across
 shrinking

As we use a shmemfs file to hold the context state, when not in use it
may be swapped out, such as across suspend. Since we wrote into the
shmemfs without marking the pages as dirty, the contents may be dropped
instead of being written back to swap. On re-using the shmemfs file,
such as creating a new context after resume, the contents of that file
were likely garbage and so the new context could then hang the GPU.

Simply mark the page as being written when copying into the shmemfs
file, and it the new contents will be retained across swapout.

Fixes: be1cb55a07bf ("drm/i915/gt: Keep a no-frills swappable copy of the default context state")
Cc: Sudeep Dutt <sudeep.dutt@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Venkata Ramana Nayana <venkata.ramana.nayana@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: <stable@vger.kernel.org> # v5.8+
Link: https://patchwork.freedesktop.org/patch/msgid/20201127120718.454037-161-matthew.auld@intel.com
(cherry picked from commit a9d71f76ccfd309f3bd5f7c9b60e91a4decae792)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/shmem_utils.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index f011ea42487e..463af675fadd 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -103,10 +103,13 @@ static int __shmem_rw(struct file *file, loff_t off,
 			return PTR_ERR(page);
 
 		vaddr = kmap(page);
-		if (write)
+		if (write) {
 			memcpy(vaddr + offset_in_page(off), ptr, this);
-		else
+			set_page_dirty(page);
+		} else {
 			memcpy(ptr, vaddr + offset_in_page(off), this);
+		}
+		mark_page_accessed(page);
 		kunmap(page);
 		put_page(page);
 

From aff76ab795364569b1cac58c1d0bc7df956e3899 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 24 Nov 2020 18:35:21 +0000
Subject: [PATCH 16/22] drm/i915/gt: Limit frequency drop to RPe on parking

We treat idling the GT (intel_rps_park) as a downclock event, and reduce
the frequency we intend to restart the GT with. Since the two workloads
are likely related (e.g. a compositor rendering every 16ms), we want to
carry the frequency and load information from across the idling.
However, we do also need to update the frequencies so that workloads
that run for less than 1ms are autotuned by RPS (otherwise we leave
compositors running at max clocks, draining excess power). Conversely,
if we try to run too slowly, the next workload has to run longer. Since
there is a hysteresis in the power graph, below a certain frequency
running a short workload for longer consumes more energy than running it
slightly higher for less time. The exact balance point is unknown
beforehand, but measurements with 30fps media playback indicate that RPe
is a better choice.

Reported-by: Edward Baker <edward.baker@intel.com>
Tested-by: Edward Baker <edward.baker@intel.com>
Fixes: 043cd2d14ede ("drm/i915/gt: Leave rps->cur_freq on unpark")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Edward Baker <edward.baker@intel.com>
Cc: Andi Shyti <andi.shyti@intel.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: <stable@vger.kernel.org> # v5.8+
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201124183521.28623-1-chris@chris-wilson.co.uk
(cherry picked from commit f7ed83cc1925f0b8ce2515044d674354035c3af9)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index e6a00eea0631..c1c9cc0ad3b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -883,6 +883,10 @@ void intel_rps_park(struct intel_rps *rps)
 		adj = -2;
 	rps->last_adj = adj;
 	rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+	if (rps->cur_freq < rps->efficient_freq) {
+		rps->cur_freq = rps->efficient_freq;
+		rps->last_adj = 0;
+	}
 
 	GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 }

From 37eade64eb11c6d548c9a7030ccc655decfb8fa0 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 6 Nov 2020 14:55:27 -0800
Subject: [PATCH 17/22] drm/i915/display: return earlier from
 intel_modeset_init() without display
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

!HAS_DISPLAY() implies !HAS_OVERLAY(), skipping overlay setup anyway, so
return earlier from intel_modeset_init() for clarity.

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201106225531.920641-4-lucas.demarchi@intel.com
(cherry picked from commit 71c8415d0daa78ef1295743d0e11ba0214d0a9b9)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 99e682563d47..c0d920f596f4 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -18039,11 +18039,11 @@ int intel_modeset_init(struct drm_i915_private *i915)
 {
 	int ret;
 
-	intel_overlay_setup(i915);
-
 	if (!HAS_DISPLAY(i915))
 		return 0;
 
+	intel_overlay_setup(i915);
+
 	ret = intel_fbdev_init(&i915->drm);
 	if (ret)
 		return ret;

From ccc9e67ab26feda7e62749bb54c05d7abe07dca9 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 25 Nov 2020 19:30:32 +0000
Subject: [PATCH 18/22] drm/i915/display: Defer initial modeset until after
 GGTT is initialised
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prior to sanitizing the GGTT, the only operations allowed in
intel_display_init_nogem() are those to reserve the preallocated (and
active) regions in the GGTT leftover from the BIOS. Trying to allocate a
GGTT vma (such as intel_pin_and_fence_fb_obj during the initial modeset)
may then conflict with other preallocated regions that have not yet been
protected.

Move the initial modesetting from the end of init_nogem to the beginning
of init so that any vma pinning (either framebuffers or DSB, for example),
is after the GGTT is ready to handle it.

This will prevent the DSB object from being destroyed too early:

[   53.449241] BUG: KASAN: use-after-free in i915_init_ggtt+0x324/0x9e0 [i915]
[   53.449309] Read of size 8 at addr ffff88811b1e8070 by task systemd-udevd/345

[   53.449399] CPU: 1 PID: 345 Comm: systemd-udevd Tainted: G        W         5.10.0-rc5+ #12
[   53.449409] Call Trace:
[   53.449418]  dump_stack+0x9a/0xcc
[   53.449558]  ? i915_init_ggtt+0x324/0x9e0 [i915]
[   53.449565]  print_address_description.constprop.0+0x3e/0x60
[   53.449577]  ? _raw_spin_lock_irqsave+0x4e/0x50
[   53.449718]  ? i915_init_ggtt+0x324/0x9e0 [i915]
[   53.449849]  ? i915_init_ggtt+0x324/0x9e0 [i915]
[   53.449857]  kasan_report.cold+0x1f/0x37
[   53.449993]  ? i915_init_ggtt+0x324/0x9e0 [i915]
[   53.450130]  i915_init_ggtt+0x324/0x9e0 [i915]
[   53.450273]  ? i915_ggtt_suspend+0x1f0/0x1f0 [i915]
[   53.450281]  ? static_obj+0x69/0x80
[   53.450289]  ? lockdep_init_map_waits+0xa9/0x310
[   53.450431]  ? intel_wopcm_init+0x96/0x3d0 [i915]
[   53.450581]  ? i915_gem_init+0x75/0x2d0 [i915]
[   53.450720]  i915_gem_init+0x75/0x2d0 [i915]
[   53.450852]  i915_driver_probe+0x8c2/0x1210 [i915]
[   53.450993]  ? i915_pm_prepare+0x630/0x630 [i915]
[   53.451006]  ? check_chain_key+0x1e7/0x2e0
[   53.451025]  ? __pm_runtime_resume+0x58/0xb0
[   53.451157]  i915_pci_probe+0xa6/0x2b0 [i915]
[   53.451285]  ? i915_pci_remove+0x40/0x40 [i915]
[   53.451295]  ? lockdep_hardirqs_on_prepare+0x124/0x230
[   53.451302]  ? _raw_spin_unlock_irqrestore+0x42/0x50
[   53.451309]  ? lockdep_hardirqs_on+0xbf/0x130
[   53.451315]  ? preempt_count_sub+0xf/0xb0
[   53.451321]  ? _raw_spin_unlock_irqrestore+0x2f/0x50
[   53.451335]  pci_device_probe+0xf9/0x190
[   53.451350]  really_probe+0x17f/0x5b0
[   53.451365]  driver_probe_device+0x13a/0x1c0
[   53.451376]  device_driver_attach+0x82/0x90
[   53.451386]  ? device_driver_attach+0x90/0x90
[   53.451391]  __driver_attach+0xab/0x190
[   53.451401]  ? device_driver_attach+0x90/0x90
[   53.451407]  bus_for_each_dev+0xe4/0x140
[   53.451414]  ? subsys_dev_iter_exit+0x10/0x10
[   53.451423]  ? __list_add_valid+0x2b/0xa0
[   53.451440]  bus_add_driver+0x227/0x2e0
[   53.451454]  driver_register+0xd3/0x150
[   53.451585]  i915_init+0x92/0xac [i915]
[   53.451592]  ? 0xffffffffa0a20000
[   53.451598]  do_one_initcall+0xb6/0x3b0
[   53.451606]  ? trace_event_raw_event_initcall_finish+0x150/0x150
[   53.451614]  ? __kasan_kmalloc.constprop.0+0xc2/0xd0
[   53.451627]  ? kmem_cache_alloc_trace+0x4a4/0x8e0
[   53.451634]  ? kasan_unpoison_shadow+0x33/0x40
[   53.451649]  do_init_module+0xf8/0x350
[   53.451662]  load_module+0x43de/0x47f0
[   53.451716]  ? module_frob_arch_sections+0x20/0x20
[   53.451731]  ? rw_verify_area+0x5f/0x130
[   53.451780]  ? __do_sys_finit_module+0x10d/0x1a0
[   53.451785]  __do_sys_finit_module+0x10d/0x1a0
[   53.451792]  ? __ia32_sys_init_module+0x40/0x40
[   53.451800]  ? seccomp_do_user_notification.isra.0+0x5c0/0x5c0
[   53.451829]  ? rcu_read_lock_bh_held+0xb0/0xb0
[   53.451835]  ? mark_held_locks+0x24/0x90
[   53.451856]  do_syscall_64+0x33/0x80
[   53.451863]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   53.451868] RIP: 0033:0x7fde09b4470d
[   53.451875] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 53 f7 0c 00 f7 d8 64 89 01 48
[   53.451880] RSP: 002b:00007ffd6abc1718 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[   53.451890] RAX: ffffffffffffffda RBX: 000056444e528150 RCX: 00007fde09b4470d
[   53.451895] RDX: 0000000000000000 RSI: 00007fde09a21ded RDI: 000000000000000f
[   53.451899] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000
[   53.451904] R10: 000000000000000f R11: 0000000000000246 R12: 00007fde09a21ded
[   53.451909] R13: 0000000000000000 R14: 000056444e329200 R15: 000056444e528150

[   53.451957] Allocated by task 345:
[   53.451995]  kasan_save_stack+0x1b/0x40
[   53.452001]  __kasan_kmalloc.constprop.0+0xc2/0xd0
[   53.452006]  kmem_cache_alloc+0x1cd/0x8d0
[   53.452146]  i915_vma_instance+0x126/0xb70 [i915]
[   53.452304]  i915_gem_object_ggtt_pin_ww+0x222/0x3f0 [i915]
[   53.452446]  intel_dsb_prepare+0x14f/0x230 [i915]
[   53.452588]  intel_atomic_commit+0x183/0x690 [i915]
[   53.452730]  intel_initial_commit+0x2bc/0x2f0 [i915]
[   53.452871]  intel_modeset_init_nogem+0xa02/0x2af0 [i915]
[   53.452995]  i915_driver_probe+0x8af/0x1210 [i915]
[   53.453120]  i915_pci_probe+0xa6/0x2b0 [i915]
[   53.453125]  pci_device_probe+0xf9/0x190
[   53.453131]  really_probe+0x17f/0x5b0
[   53.453136]  driver_probe_device+0x13a/0x1c0
[   53.453142]  device_driver_attach+0x82/0x90
[   53.453148]  __driver_attach+0xab/0x190
[   53.453153]  bus_for_each_dev+0xe4/0x140
[   53.453158]  bus_add_driver+0x227/0x2e0
[   53.453164]  driver_register+0xd3/0x150
[   53.453286]  i915_init+0x92/0xac [i915]
[   53.453292]  do_one_initcall+0xb6/0x3b0
[   53.453297]  do_init_module+0xf8/0x350
[   53.453302]  load_module+0x43de/0x47f0
[   53.453307]  __do_sys_finit_module+0x10d/0x1a0
[   53.453312]  do_syscall_64+0x33/0x80
[   53.453318]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[   53.453345] Freed by task 82:
[   53.453379]  kasan_save_stack+0x1b/0x40
[   53.453384]  kasan_set_track+0x1c/0x30
[   53.453389]  kasan_set_free_info+0x1b/0x30
[   53.453394]  __kasan_slab_free+0x112/0x160
[   53.453399]  kmem_cache_free+0xb2/0x3f0
[   53.453536]  i915_gem_flush_free_objects+0x31a/0x3b0 [i915]
[   53.453542]  process_one_work+0x519/0x9f0
[   53.453547]  worker_thread+0x75/0x5c0
[   53.453552]  kthread+0x1da/0x230
[   53.453557]  ret_from_fork+0x22/0x30

[   53.453584] The buggy address belongs to the object at ffff88811b1e8040
                which belongs to the cache i915_vma of size 968
[   53.453692] The buggy address is located 48 bytes inside of
                968-byte region [ffff88811b1e8040, ffff88811b1e8408)
[   53.453792] The buggy address belongs to the page:
[   53.453842] page:00000000b35f7048 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88811b1ef940 pfn:0x11b1e8
[   53.453847] head:00000000b35f7048 order:3 compound_mapcount:0 compound_pincount:0
[   53.453853] flags: 0x8000000000010200(slab|head)
[   53.453860] raw: 8000000000010200 ffff888115596248 ffff888115596248 ffff8881155b6340
[   53.453866] raw: ffff88811b1ef940 0000000000170001 00000001ffffffff 0000000000000000
[   53.453870] page dumped because: kasan: bad access detected

[   53.453895] Memory state around the buggy address:
[   53.453944]  ffff88811b1e7f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   53.454011]  ffff88811b1e7f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   53.454079] >ffff88811b1e8000: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb
[   53.454146]                                                              ^
[   53.454211]  ffff88811b1e8080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   53.454279]  ffff88811b1e8100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   53.454347] ==================================================================
[   53.454414] Disabling lock debugging due to kernel taint
[   53.454434] general protection fault, probably for non-canonical address 0xdead0000000000d0: 0000 [#1] PREEMPT SMP KASAN PTI
[   53.454446] CPU: 1 PID: 345 Comm: systemd-udevd Tainted: G    B   W         5.10.0-rc5+ #12
[   53.454592] RIP: 0010:i915_init_ggtt+0x26f/0x9e0 [i915]
[   53.454602] Code: 89 8d 48 ff ff ff 4c 8d 60 d0 49 39 c7 0f 84 37 02 00 00 4c 89 b5 40 ff ff ff 4d 8d bc 24 90 00 00 00 4c 89 ff e8 c1 97 f8 e0 <49> 83 bc 24 90 00 00 00 00 0f 84 0f 02 00 00 49 8d 7c 24 08 e8 a8
[   53.454618] RSP: 0018:ffff88812247f430 EFLAGS: 00010286
[   53.454625] RAX: 0000000000000000 RBX: ffff888136440000 RCX: ffffffffa03fb78f
[   53.454633] RDX: 0000000000000000 RSI: 0000000000000008 RDI: dead000000000160
[   53.454641] RBP: ffff88812247f500 R08: ffffffff8113589f R09: 0000000000000000
[   53.454648] R10: ffffffff83063843 R11: fffffbfff060c708 R12: dead0000000000d0
[   53.454656] R13: ffff888136449ba0 R14: 0000000000002000 R15: dead000000000160
[   53.454664] FS:  00007fde095c4880(0000) GS:ffff88840c880000(0000) knlGS:0000000000000000
[   53.454672] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   53.454679] CR2: 00007fef132b4f28 CR3: 000000012245c002 CR4: 00000000003706e0
[   53.454686] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   53.454693] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   53.454700] Call Trace:
[   53.454833]  ? i915_ggtt_suspend+0x1f0/0x1f0 [i915]

Reported-by: Matthew Auld <matthew.auld@intel.com>
Fixes: afeda4f3b1c8 ("drm/i915/dsb: Pre allocate and late cleanup of cmd buffer")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Tested-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201125193032.29282-1-chris@chris-wilson.co.uk
(cherry picked from commit b3bf99daaee96a141536ce5c60a0d6dba6ec1d23)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c0d920f596f4..3bfe6ed67da1 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -18021,16 +18021,6 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915)
 	if (!HAS_GMCH(i915))
 		sanitize_watermarks(i915);
 
-	/*
-	 * Force all active planes to recompute their states. So that on
-	 * mode_setcrtc after probe, all the intel_plane_state variables
-	 * are already calculated and there is no assert_plane warnings
-	 * during bootup.
-	 */
-	ret = intel_initial_commit(dev);
-	if (ret)
-		drm_dbg_kms(&i915->drm, "Initial commit in probe failed.\n");
-
 	return 0;
 }
 
@@ -18042,6 +18032,16 @@ int intel_modeset_init(struct drm_i915_private *i915)
 	if (!HAS_DISPLAY(i915))
 		return 0;
 
+	/*
+	 * Force all active planes to recompute their states. So that on
+	 * mode_setcrtc after probe, all the intel_plane_state variables
+	 * are already calculated and there is no assert_plane warnings
+	 * during bootup.
+	 */
+	ret = intel_initial_commit(&i915->drm);
+	if (ret)
+		return ret;
+
 	intel_overlay_setup(i915);
 
 	ret = intel_fbdev_init(&i915->drm);

From acab02c1af43d3a9051524579b1c3dcfbfa5479d Mon Sep 17 00:00:00 2001
From: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Date: Fri, 27 Nov 2020 21:40:24 +0530
Subject: [PATCH 19/22] drm/amdgpu/pm/smu11: Fix fan set speed bug

Fix fan set speed calculation.

Suggested-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 2380759ddf48..6db96fa1df09 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -1164,7 +1164,12 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
 	if (ret)
 		return ret;
 
-	crystal_clock_freq = amdgpu_asic_get_xclk(adev);
+	/*
+	 * crystal_clock_freq div by 4 is required since the fan control
+	 * module refers to 25MHz
+	 */
+
+	crystal_clock_freq = amdgpu_asic_get_xclk(adev) / 4;
 	tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed);
 	WREG32_SOC15(THM, 0, mmCG_TACH_CTRL,
 		     REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL),

From 7e0b367db85ef7b91399006253759a024eab7653 Mon Sep 17 00:00:00 2001
From: Brandon Syu <Brandon.Syu@amd.com>
Date: Thu, 12 Nov 2020 15:35:52 +0800
Subject: [PATCH 20/22] drm/amd/display: Init clock value by current vbios CLKs

[Why]
While booting into OS, driver updates DPP/DISP CLKs.
But init clock value is zero which is invalid.

[How]
Get current clocks value to update init clocks.
To avoid underflow.

Signed-off-by: Brandon Syu <Brandon.Syu@amd.com>
Reviewed-by: Tony Cheng <Tony.Cheng@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c   | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 2f8fee05547a..c001307b0a59 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -163,8 +163,17 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 			new_clocks->dppclk_khz = 100000;
 	}
 
-	if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
-		if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+	/*
+	 * Temporally ignore thew 0 cases for disp and dpp clks.
+	 * We may have a new feature that requires 0 clks in the future.
+	 */
+	if (new_clocks->dppclk_khz == 0 || new_clocks->dispclk_khz == 0) {
+		new_clocks->dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+		new_clocks->dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+	}
+
+	if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) {
+		if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz)
 			dpp_clock_lowered = true;
 		clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
 		update_dppclk = true;

From ac2db9488cf21de0be7899c1e5963e5ac0ff351f Mon Sep 17 00:00:00 2001
From: Boyuan Zhang <boyuan.zhang@amd.com>
Date: Sun, 10 May 2020 15:47:03 -0400
Subject: [PATCH 21/22] drm/amdgpu/vcn3.0: stall DPG when WPTR/RPTR reset

Port from VCN2.5
Add vcn dpg harware synchronization to fix race condition
issue between vcn driver and hardware.

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index e074f7ed388c..2aa84993f42d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1011,6 +1011,11 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
 
+	/* Stall DPG before WPTR/RPTR reset */
+	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
 	/* set the write pointer delay */
 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
 
@@ -1033,6 +1038,10 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
 		lower_32_bits(ring->wptr));
 
+	/* Unstall DPG */
+	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
 	return 0;
 }
 
@@ -1556,8 +1565,14 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
 					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
 					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
 
+				/* Stall DPG before WPTR/RPTR reset */
+				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+					UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+					~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
 				/* Restore */
 				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+				ring->wptr = 0;
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
@@ -1565,6 +1580,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
 
 				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+				ring->wptr = 0;
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
@@ -1574,6 +1590,10 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
 					RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
 
+				/* Unstall DPG */
+				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+					0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
 				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
 					UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
 			}

From efd6d85a18102241538dd1cc257948a0dbe6fae6 Mon Sep 17 00:00:00 2001
From: Boyuan Zhang <boyuan.zhang@amd.com>
Date: Tue, 19 May 2020 11:38:44 -0400
Subject: [PATCH 22/22] drm/amdgpu/vcn3.0: remove old DPG workaround

Port from VCN2.5
SCRATCH2 is used to keep decode wptr as a workaround
which fix a hardware DPG decode wptr update bug for
vcn2.5 beforehand.

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 2aa84993f42d..b5f8f3d731cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1587,9 +1587,6 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
 
-				WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
-					RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
-
 				/* Unstall DPG */
 				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
 					0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
@@ -1650,10 +1647,6 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
-		WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
-			lower_32_bits(ring->wptr) | 0x80000000);
-
 	if (ring->use_doorbell) {
 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));