drm/msm/a6xx: Fix excessive stack usage

Clang-19 and above sometimes end up with multiple copies of the large
a6xx_hfi_msg_bw_table structure on the stack. The problem is that
a6xx_hfi_send_bw_table() calls a number of device specific functions to
fill the structure, but these create another copy of the structure on
the stack which gets copied to the first.

If the functions get inlined, that busts the warning limit:

drivers/gpu/drm/msm/adreno/a6xx_hfi.c:631:12: error: stack frame size (1032) exceeds limit (1024) in 'a6xx_hfi_send_bw_table' [-Werror,-Wframe-larger-than]

Fix this by kmalloc-ating struct a6xx_hfi_msg_bw_table instead of using
the stack. Also, use this opportunity to skip re-initializing this table
to optimize gpu wake up latency.

Cc: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Akhil P Oommen <quic_akhilpo@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/621814/
Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
Akhil P Oommen
2024-10-27 23:35:47 +05:30
committed by Rob Clark
parent 8f32ddd87e
commit d6d1ad32d0
2 changed files with 35 additions and 24 deletions

View File

@@ -99,6 +99,7 @@ struct a6xx_gmu {
struct completion pd_gate;
struct qmp *qmp;
struct a6xx_hfi_msg_bw_table *bw_table;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)

View File

@@ -661,34 +661,44 @@ static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
{
struct a6xx_hfi_msg_bw_table msg = { 0 };
struct a6xx_hfi_msg_bw_table *msg;
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
if (adreno_is_a618(adreno_gpu))
a618_build_bw_table(&msg);
else if (adreno_is_a619(adreno_gpu))
a619_build_bw_table(&msg);
else if (adreno_is_a640_family(adreno_gpu))
a640_build_bw_table(&msg);
else if (adreno_is_a650(adreno_gpu))
a650_build_bw_table(&msg);
else if (adreno_is_7c3(adreno_gpu))
adreno_7c3_build_bw_table(&msg);
else if (adreno_is_a660(adreno_gpu))
a660_build_bw_table(&msg);
else if (adreno_is_a663(adreno_gpu))
a663_build_bw_table(&msg);
else if (adreno_is_a690(adreno_gpu))
a690_build_bw_table(&msg);
else if (adreno_is_a730(adreno_gpu))
a730_build_bw_table(&msg);
else if (adreno_is_a740_family(adreno_gpu))
a740_build_bw_table(&msg);
else
a6xx_build_bw_table(&msg);
if (gmu->bw_table)
goto send;
return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg),
msg = devm_kzalloc(gmu->dev, sizeof(*msg), GFP_KERNEL);
if (!msg)
return -ENOMEM;
if (adreno_is_a618(adreno_gpu))
a618_build_bw_table(msg);
else if (adreno_is_a619(adreno_gpu))
a619_build_bw_table(msg);
else if (adreno_is_a640_family(adreno_gpu))
a640_build_bw_table(msg);
else if (adreno_is_a650(adreno_gpu))
a650_build_bw_table(msg);
else if (adreno_is_7c3(adreno_gpu))
adreno_7c3_build_bw_table(msg);
else if (adreno_is_a660(adreno_gpu))
a660_build_bw_table(msg);
else if (adreno_is_a663(adreno_gpu))
a663_build_bw_table(msg);
else if (adreno_is_a690(adreno_gpu))
a690_build_bw_table(msg);
else if (adreno_is_a730(adreno_gpu))
a730_build_bw_table(msg);
else if (adreno_is_a740_family(adreno_gpu))
a740_build_bw_table(msg);
else
a6xx_build_bw_table(msg);
gmu->bw_table = msg;
send:
return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, gmu->bw_table, sizeof(*(gmu->bw_table)),
NULL, 0);
}