mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-22 19:05:24 -04:00
Using the DRM GPU scheduler infrastructure, with a scheduler for each core. Userspace can decide for a series of tasks to be executed sequentially in the same core, so SRAM locality can be taken advantage of. The job submission code was initially based on Panfrost. v2: - Remove hardcoded number of cores - Misc. style fixes (Jeffrey Hugo) - Repack IOCTL struct (Jeffrey Hugo) v3: - Adapt to a split of the register block in the DT bindings (Nicolas Frattaroli) - Make use of GPL-2.0-only for the copyright notice (Jeff Hugo) - Use drm_* logging functions (Thomas Zimmermann) - Rename reg i/o macros (Thomas Zimmermann) - Add padding to ioctls and check for zero (Jeff Hugo) - Improve error handling (Nicolas Frattaroli) v6: - Use mutexes guard (Markus Elfring) - Use u64_to_user_ptr (Jeff Hugo) - Drop rocket_fence (Rob Herring) v7: - Assign its own IOMMU domain to each client, for isolation (Daniel Stone and Robin Murphy) v8: - Use reset lines to reset the cores (Robin Murphy) - Use the macros to compute the values for the bitfields (Robin Murphy) - More descriptive name for the IRQ (Robin Murphy) - Simplify job interrupt handing (Robin Murphy) - Correctly acquire a reference to the IOMMU (Robin Murphy) - Specify the size of the embedded structs in the IOCTLs for future extensibility (Rob Herring) - Expose only 32 bits for the address of the regcmd BO (Robin Murphy) Tested-by: Heiko Stuebner <heiko@sntech.de> Reviewed-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com> Signed-off-by: Tomeu Vizoso <tomeu@tomeuvizoso.net> Signed-off-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com> Link: https://lore.kernel.org/r/20250721-6-10-rocket-v9-4-77ebd484941e@tomeuvizoso.net
111 lines
2.9 KiB
C
111 lines
2.9 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
|
|
|
|
#include <linux/clk.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/dev_printk.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/err.h>
|
|
#include <linux/iommu.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/pm_runtime.h>
|
|
#include <linux/reset.h>
|
|
|
|
#include "rocket_core.h"
|
|
#include "rocket_job.h"
|
|
|
|
int rocket_core_init(struct rocket_core *core)
|
|
{
|
|
struct device *dev = core->dev;
|
|
struct platform_device *pdev = to_platform_device(dev);
|
|
u32 version;
|
|
int err = 0;
|
|
|
|
core->resets[0].id = "srst_a";
|
|
core->resets[1].id = "srst_h";
|
|
err = devm_reset_control_bulk_get_exclusive(&pdev->dev, ARRAY_SIZE(core->resets),
|
|
core->resets);
|
|
if (err)
|
|
return dev_err_probe(dev, err, "failed to get resets for core %d\n", core->index);
|
|
|
|
err = devm_clk_bulk_get(dev, ARRAY_SIZE(core->clks), core->clks);
|
|
if (err)
|
|
return dev_err_probe(dev, err, "failed to get clocks for core %d\n", core->index);
|
|
|
|
core->pc_iomem = devm_platform_ioremap_resource_byname(pdev, "pc");
|
|
if (IS_ERR(core->pc_iomem)) {
|
|
dev_err(dev, "couldn't find PC registers %ld\n", PTR_ERR(core->pc_iomem));
|
|
return PTR_ERR(core->pc_iomem);
|
|
}
|
|
|
|
core->cna_iomem = devm_platform_ioremap_resource_byname(pdev, "cna");
|
|
if (IS_ERR(core->cna_iomem)) {
|
|
dev_err(dev, "couldn't find CNA registers %ld\n", PTR_ERR(core->cna_iomem));
|
|
return PTR_ERR(core->cna_iomem);
|
|
}
|
|
|
|
core->core_iomem = devm_platform_ioremap_resource_byname(pdev, "core");
|
|
if (IS_ERR(core->core_iomem)) {
|
|
dev_err(dev, "couldn't find CORE registers %ld\n", PTR_ERR(core->core_iomem));
|
|
return PTR_ERR(core->core_iomem);
|
|
}
|
|
|
|
dma_set_max_seg_size(dev, UINT_MAX);
|
|
|
|
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
|
|
if (err)
|
|
return err;
|
|
|
|
core->iommu_group = iommu_group_get(dev);
|
|
|
|
err = rocket_job_init(core);
|
|
if (err)
|
|
return err;
|
|
|
|
pm_runtime_use_autosuspend(dev);
|
|
|
|
/*
|
|
* As this NPU will be most often used as part of a media pipeline that
|
|
* ends presenting in a display, choose 50 ms (~3 frames at 60Hz) as an
|
|
* autosuspend delay as that will keep the device powered up while the
|
|
* pipeline is running.
|
|
*/
|
|
pm_runtime_set_autosuspend_delay(dev, 50);
|
|
|
|
pm_runtime_enable(dev);
|
|
|
|
err = pm_runtime_get_sync(dev);
|
|
if (err) {
|
|
rocket_job_fini(core);
|
|
return err;
|
|
}
|
|
|
|
version = rocket_pc_readl(core, VERSION);
|
|
version += rocket_pc_readl(core, VERSION_NUM) & 0xffff;
|
|
|
|
pm_runtime_mark_last_busy(dev);
|
|
pm_runtime_put_autosuspend(dev);
|
|
|
|
dev_info(dev, "Rockchip NPU core %d version: %d\n", core->index, version);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void rocket_core_fini(struct rocket_core *core)
|
|
{
|
|
pm_runtime_dont_use_autosuspend(core->dev);
|
|
pm_runtime_disable(core->dev);
|
|
iommu_group_put(core->iommu_group);
|
|
core->iommu_group = NULL;
|
|
rocket_job_fini(core);
|
|
}
|
|
|
|
void rocket_core_reset(struct rocket_core *core)
|
|
{
|
|
reset_control_bulk_assert(ARRAY_SIZE(core->resets), core->resets);
|
|
|
|
udelay(10);
|
|
|
|
reset_control_bulk_deassert(ARRAY_SIZE(core->resets), core->resets);
|
|
}
|