From fc782242749fa4235592854fafe1a1297583c1fb Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 13 Jan 2019 17:50:10 -0500 Subject: [PATCH 01/60] drm/nouveau/volt/gf117: fix speedo readout register GF117 appears to use the same register as GK104 (but still with the general Fermi readout mechanism). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108980 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/volt.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- .../gpu/drm/nouveau/nvkm/subdev/volt/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/volt/gf117.c | 60 +++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h index 8a0f85f5fc1a..6a765682fbfa 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h @@ -38,6 +38,7 @@ int nvkm_volt_set_id(struct nvkm_volt *, u8 id, u8 min_id, u8 temp, int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gf100_volt_new(struct nvkm_device *, int, struct nvkm_volt **); +int gf117_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index d9edb5785813..d75fa7678483 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1613,7 +1613,7 @@ nvd7_chipset = { .pci = gf106_pci_new, .therm = gf119_therm_new, .timer = nv41_timer_new, - .volt = gf100_volt_new, + .volt = gf117_volt_new, .ce[0] = gf100_ce_new, .disp = gf119_disp_new, .dma = gf119_dma_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild index bcd179ba11d0..146adcdd316a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild @@ -2,6 +2,7 @@ nvkm-y += nvkm/subdev/volt/base.o nvkm-y += nvkm/subdev/volt/gpio.o nvkm-y += nvkm/subdev/volt/nv40.o nvkm-y += nvkm/subdev/volt/gf100.o +nvkm-y += nvkm/subdev/volt/gf117.o nvkm-y += nvkm/subdev/volt/gk104.o nvkm-y += nvkm/subdev/volt/gk20a.o nvkm-y += nvkm/subdev/volt/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c new file mode 100644 index 000000000000..547a58f0aeac --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c @@ -0,0 +1,60 @@ +/* + * Copyright 2019 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ilia Mirkin + */ +#include "priv.h" + +#include + +static int +gf117_volt_speedo_read(struct nvkm_volt *volt) +{ + struct nvkm_device *device = volt->subdev.device; + struct nvkm_fuse *fuse = device->fuse; + + if (!fuse) + return -EINVAL; + + return nvkm_fuse_read(fuse, 0x3a8); +} + +static const struct nvkm_volt_func +gf117_volt = { + .oneinit = gf100_volt_oneinit, + .vid_get = nvkm_voltgpio_get, + .vid_set = nvkm_voltgpio_set, + .speedo_read = gf117_volt_speedo_read, +}; + +int +gf117_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt) +{ + struct nvkm_volt *volt; + int ret; + + ret = nvkm_volt_new_(&gf117_volt, device, index, &volt); + *pvolt = volt; + if (ret) + return ret; + + return nvkm_voltgpio_init(volt); +} From b51f9dfac7195bd405efbc1acc66dca5196f424e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 11:41:53 +1000 Subject: [PATCH 02/60] drm/nouveau/devinit/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/devinit.h | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild | 2 +- .../nvkm/subdev/devinit/{tu104.c => tu102.c} | 14 +++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/subdev/devinit/{tu104.c => tu102.c} (87%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h index 1b71812a790b..8ba982c2fdfb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h @@ -31,5 +31,5 @@ int gf100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); -int tu104_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); +int tu102_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index d75fa7678483..325c4c0ba274 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2440,7 +2440,7 @@ nv162_chipset = { .bar = tu104_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, - .devinit = tu104_devinit_new, + .devinit = tu102_devinit_new, .fault = tu104_fault_new, .fb = gv100_fb_new, .fuse = gm107_fuse_new, @@ -2472,7 +2472,7 @@ nv164_chipset = { .bar = tu104_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, - .devinit = tu104_devinit_new, + .devinit = tu102_devinit_new, .fault = tu104_fault_new, .fb = gv100_fb_new, .fuse = gm107_fuse_new, @@ -2504,7 +2504,7 @@ nv166_chipset = { .bar = tu104_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, - .devinit = tu104_devinit_new, + .devinit = tu102_devinit_new, .fault = tu104_fault_new, .fb = gv100_fb_new, .fuse = gm107_fuse_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild index 3ef505a5c01b..f3c388932b6f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild @@ -13,4 +13,4 @@ nvkm-y += nvkm/subdev/devinit/gf100.o nvkm-y += nvkm/subdev/devinit/gm107.o nvkm-y += nvkm/subdev/devinit/gm200.o nvkm-y += nvkm/subdev/devinit/gv100.o -nvkm-y += nvkm/subdev/devinit/tu104.o +nvkm-y += nvkm/subdev/devinit/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c similarity index 87% rename from drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c index aae87b3fc429..397670e72fff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c @@ -26,7 +26,7 @@ #include static int -tu104_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) +tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) { struct nvkm_subdev *subdev = &init->subdev; struct nvkm_device *device = subdev->device; @@ -66,7 +66,7 @@ tu104_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) } static int -tu104_devinit_post(struct nvkm_devinit *base, bool post) +tu102_devinit_post(struct nvkm_devinit *base, bool post) { struct nv50_devinit *init = nv50_devinit(base); gm200_devinit_preos(init, post); @@ -74,16 +74,16 @@ tu104_devinit_post(struct nvkm_devinit *base, bool post) } static const struct nvkm_devinit_func -tu104_devinit = { +tu102_devinit = { .init = nv50_devinit_init, - .post = tu104_devinit_post, - .pll_set = tu104_devinit_pll_set, + .post = tu102_devinit_post, + .pll_set = tu102_devinit_pll_set, .disable = gm107_devinit_disable, }; int -tu104_devinit_new(struct nvkm_device *device, int index, +tu102_devinit_new(struct nvkm_device *device, int index, struct nvkm_devinit **pinit) { - return nv50_devinit_new_(&tu104_devinit, device, index, pinit); + return nv50_devinit_new_(&tu102_devinit, device, index, pinit); } From fd95bfbdb9952dbf13b30e964934876bc9e33551 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 11:46:39 +1000 Subject: [PATCH 03/60] drm/nouveau/mc/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild | 2 +- .../drm/nouveau/nvkm/subdev/mc/{tu104.c => tu102.c} | 10 +++++----- 4 files changed, 10 insertions(+), 10 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/subdev/mc/{tu104.c => tu102.c} (88%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h index b66dedd8abb6..e38f4958dea2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h @@ -31,5 +31,5 @@ int gk104_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **); -int tu104_mc_new(struct nvkm_device *, int, struct nvkm_mc **); +int tu102_mc_new(struct nvkm_device *, int, struct nvkm_mc **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 325c4c0ba274..bc4ddb7bb66b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2449,7 +2449,7 @@ nv162_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .ltc = gp102_ltc_new, - .mc = tu104_mc_new, + .mc = tu102_mc_new, .mmu = tu104_mmu_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2481,7 +2481,7 @@ nv164_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .ltc = gp102_ltc_new, - .mc = tu104_mc_new, + .mc = tu102_mc_new, .mmu = tu104_mmu_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, @@ -2513,7 +2513,7 @@ nv166_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .ltc = gp102_ltc_new, - .mc = tu104_mc_new, + .mc = tu102_mc_new, .mmu = tu104_mmu_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild index f3b06329c338..c64e399326b3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild @@ -12,4 +12,4 @@ nvkm-y += nvkm/subdev/mc/gk104.o nvkm-y += nvkm/subdev/mc/gk20a.o nvkm-y += nvkm/subdev/mc/gp100.o nvkm-y += nvkm/subdev/mc/gp10b.o -nvkm-y += nvkm/subdev/mc/tu104.o +nvkm-y += nvkm/subdev/mc/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c similarity index 88% rename from drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c index b7165bd18999..d098c44a4fcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c @@ -22,7 +22,7 @@ #include "priv.h" static void -tu104_mc_intr_hack(struct nvkm_mc *mc, bool *handled) +tu102_mc_intr_hack(struct nvkm_mc *mc, bool *handled) { struct nvkm_device *device = mc->subdev.device; u32 stat = nvkm_rd32(device, 0xb81010); @@ -37,19 +37,19 @@ tu104_mc_intr_hack(struct nvkm_mc *mc, bool *handled) } static const struct nvkm_mc_func -tu104_mc = { +tu102_mc = { .init = nv50_mc_init, .intr = gp100_mc_intr, .intr_unarm = gp100_mc_intr_unarm, .intr_rearm = gp100_mc_intr_rearm, .intr_mask = gp100_mc_intr_mask, .intr_stat = gf100_mc_intr_stat, - .intr_hack = tu104_mc_intr_hack, + .intr_hack = tu102_mc_intr_hack, .reset = gk104_mc_reset, }; int -tu104_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc) +tu102_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc) { - return gp100_mc_new_(&tu104_mc, device, index, pmc); + return gp100_mc_new_(&tu102_mc, device, index, pmc); } From c011b25421f321b3692c3683fe2970aa6343f694 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 12:04:02 +1000 Subject: [PATCH 04/60] drm/nouveau/mmu/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild | 4 ++-- .../drm/nouveau/nvkm/subdev/mmu/{tu104.c => tu102.c} | 8 ++++---- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 2 +- .../nouveau/nvkm/subdev/mmu/{vmmtu104.c => vmmtu102.c} | 10 +++++----- 6 files changed, 16 insertions(+), 16 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/subdev/mmu/{tu104.c => tu102.c} (88%) rename drivers/gpu/drm/nouveau/nvkm/subdev/mmu/{vmmtu104.c => vmmtu102.c} (92%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 0a0e064f22e5..215a672f1cde 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -130,5 +130,5 @@ int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gv100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); -int tu104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int tu102_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index bc4ddb7bb66b..5079bdff9187 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2450,7 +2450,7 @@ nv162_chipset = { .imem = nv50_instmem_new, .ltc = gp102_ltc_new, .mc = tu102_mc_new, - .mmu = tu104_mmu_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .therm = gp100_therm_new, @@ -2482,7 +2482,7 @@ nv164_chipset = { .imem = nv50_instmem_new, .ltc = gp102_ltc_new, .mc = tu102_mc_new, - .mmu = tu104_mmu_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .therm = gp100_therm_new, @@ -2514,7 +2514,7 @@ nv166_chipset = { .imem = nv50_instmem_new, .ltc = gp102_ltc_new, .mc = tu102_mc_new, - .mmu = tu104_mmu_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .therm = gp100_therm_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild index 8966180b36cc..db9c56028f21 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -13,7 +13,7 @@ nvkm-y += nvkm/subdev/mmu/gm20b.o nvkm-y += nvkm/subdev/mmu/gp100.o nvkm-y += nvkm/subdev/mmu/gp10b.o nvkm-y += nvkm/subdev/mmu/gv100.o -nvkm-y += nvkm/subdev/mmu/tu104.o +nvkm-y += nvkm/subdev/mmu/tu102.o nvkm-y += nvkm/subdev/mmu/mem.o nvkm-y += nvkm/subdev/mmu/memnv04.o @@ -34,7 +34,7 @@ nvkm-y += nvkm/subdev/mmu/vmmgm20b.o nvkm-y += nvkm/subdev/mmu/vmmgp100.o nvkm-y += nvkm/subdev/mmu/vmmgp10b.o nvkm-y += nvkm/subdev/mmu/vmmgv100.o -nvkm-y += nvkm/subdev/mmu/vmmtu104.o +nvkm-y += nvkm/subdev/mmu/vmmtu102.o nvkm-y += nvkm/subdev/mmu/umem.o nvkm-y += nvkm/subdev/mmu/ummu.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c similarity index 88% rename from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c index 8e6f4096170d..c0db0ce10cba 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c @@ -27,17 +27,17 @@ #include static const struct nvkm_mmu_func -tu104_mmu = { +tu102_mmu = { .dma_bits = 47, .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, - .vmm = {{ -1, 0, NVIF_CLASS_VMM_GP100}, tu104_vmm_new }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GP100}, tu102_vmm_new }, .kind = gm200_mmu_kind, .kind_sys = true, }; int -tu104_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +tu102_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) { - return nvkm_mmu_new_(&tu104_mmu, device, index, pmmu); + return nvkm_mmu_new_(&tu102_mmu, device, index, pmmu); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 42ad326521a3..e6a02b568c1b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -242,7 +242,7 @@ int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, int gv100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int tu104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int tu102_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c similarity index 92% rename from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c rename to drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index adaadd92110f..56c630d141da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -24,7 +24,7 @@ #include static void -tu104_vmm_flush(struct nvkm_vmm *vmm, int depth) +tu102_vmm_flush(struct nvkm_vmm *vmm, int depth) { struct nvkm_subdev *subdev = &vmm->mmu->subdev; struct nvkm_device *device = subdev->device; @@ -50,12 +50,12 @@ tu104_vmm_flush(struct nvkm_vmm *vmm, int depth) } static const struct nvkm_vmm_func -tu104_vmm = { +tu102_vmm = { .join = gv100_vmm_join, .part = gf100_vmm_part, .aper = gf100_vmm_aper, .valid = gp100_vmm_valid, - .flush = tu104_vmm_flush, + .flush = tu102_vmm_flush, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, @@ -68,10 +68,10 @@ tu104_vmm = { }; int -tu104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, +tu102_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&tu104_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&tu102_vmm, mmu, 0, addr, size, argv, argc, key, name, pvmm); } From ef7664d9dff9f2684c93c9adf2493081d8c9bc0a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 12:06:01 +1000 Subject: [PATCH 05/60] drm/nouveau/bar/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/bar.h | 2 +- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 6 ++-- .../gpu/drm/nouveau/nvkm/subdev/bar/Kbuild | 2 +- .../nvkm/subdev/bar/{tu104.c => tu102.c} | 30 +++++++++---------- 4 files changed, 20 insertions(+), 20 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/subdev/bar/{tu104.c => tu102.c} (84%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h index fd9d713b611c..da14486317ca 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h @@ -29,5 +29,5 @@ int gf100_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gk20a_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gm107_bar_new(struct nvkm_device *, int, struct nvkm_bar **); int gm20b_bar_new(struct nvkm_device *, int, struct nvkm_bar **); -int tu104_bar_new(struct nvkm_device *, int, struct nvkm_bar **); +int tu102_bar_new(struct nvkm_device *, int, struct nvkm_bar **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 5079bdff9187..f87b298b192f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2437,7 +2437,7 @@ nv140_chipset = { static const struct nvkm_device_chip nv162_chipset = { .name = "TU102", - .bar = tu104_bar_new, + .bar = tu102_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = tu102_devinit_new, @@ -2469,7 +2469,7 @@ nv162_chipset = { static const struct nvkm_device_chip nv164_chipset = { .name = "TU104", - .bar = tu104_bar_new, + .bar = tu102_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = tu102_devinit_new, @@ -2501,7 +2501,7 @@ nv164_chipset = { static const struct nvkm_device_chip nv166_chipset = { .name = "TU106", - .bar = tu104_bar_new, + .bar = tu102_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = tu102_devinit_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild index ab0282dc0736..dc300600c019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild @@ -5,4 +5,4 @@ nvkm-y += nvkm/subdev/bar/gf100.o nvkm-y += nvkm/subdev/bar/gk20a.o nvkm-y += nvkm/subdev/bar/gm107.o nvkm-y += nvkm/subdev/bar/gm20b.o -nvkm-y += nvkm/subdev/bar/tu104.o +nvkm-y += nvkm/subdev/bar/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c similarity index 84% rename from drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c index ecaead156e9b..798f65ec3a86 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c @@ -25,7 +25,7 @@ #include static void -tu104_bar_bar2_wait(struct nvkm_bar *bar) +tu102_bar_bar2_wait(struct nvkm_bar *bar) { struct nvkm_device *device = bar->subdev.device; nvkm_msec(device, 2000, @@ -35,13 +35,13 @@ tu104_bar_bar2_wait(struct nvkm_bar *bar) } static void -tu104_bar_bar2_fini(struct nvkm_bar *bar) +tu102_bar_bar2_fini(struct nvkm_bar *bar) { nvkm_mask(bar->subdev.device, 0xb80f48, 0x80000000, 0x00000000); } static void -tu104_bar_bar2_init(struct nvkm_bar *base) +tu102_bar_bar2_init(struct nvkm_bar *base) { struct nvkm_device *device = base->subdev.device; struct gf100_bar *bar = gf100_bar(base); @@ -52,7 +52,7 @@ tu104_bar_bar2_init(struct nvkm_bar *base) } static void -tu104_bar_bar1_wait(struct nvkm_bar *bar) +tu102_bar_bar1_wait(struct nvkm_bar *bar) { struct nvkm_device *device = bar->subdev.device; nvkm_msec(device, 2000, @@ -62,13 +62,13 @@ tu104_bar_bar1_wait(struct nvkm_bar *bar) } static void -tu104_bar_bar1_fini(struct nvkm_bar *bar) +tu102_bar_bar1_fini(struct nvkm_bar *bar) { nvkm_mask(bar->subdev.device, 0xb80f40, 0x80000000, 0x00000000); } static void -tu104_bar_bar1_init(struct nvkm_bar *base) +tu102_bar_bar1_init(struct nvkm_bar *base) { struct nvkm_device *device = base->subdev.device; struct gf100_bar *bar = gf100_bar(base); @@ -77,22 +77,22 @@ tu104_bar_bar1_init(struct nvkm_bar *base) } static const struct nvkm_bar_func -tu104_bar = { +tu102_bar = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, - .bar1.init = tu104_bar_bar1_init, - .bar1.fini = tu104_bar_bar1_fini, - .bar1.wait = tu104_bar_bar1_wait, + .bar1.init = tu102_bar_bar1_init, + .bar1.fini = tu102_bar_bar1_fini, + .bar1.wait = tu102_bar_bar1_wait, .bar1.vmm = gf100_bar_bar1_vmm, - .bar2.init = tu104_bar_bar2_init, - .bar2.fini = tu104_bar_bar2_fini, - .bar2.wait = tu104_bar_bar2_wait, + .bar2.init = tu102_bar_bar2_init, + .bar2.fini = tu102_bar_bar2_fini, + .bar2.wait = tu102_bar_bar2_wait, .bar2.vmm = gf100_bar_bar2_vmm, .flush = g84_bar_flush, }; int -tu104_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar) +tu102_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar) { - return gf100_bar_new_(&tu104_bar, device, index, pbar); + return gf100_bar_new_(&tu102_bar, device, index, pbar); } From 954f97983cb43afbbf994ac9fbc6a0983d2ed5bd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 12:07:23 +1000 Subject: [PATCH 06/60] drm/nouveau/fault/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/fault.h | 2 +- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 6 +-- .../gpu/drm/nouveau/nvkm/subdev/fault/Kbuild | 2 +- .../nvkm/subdev/fault/{tu104.c => tu102.c} | 38 +++++++++---------- 4 files changed, 24 insertions(+), 24 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/subdev/fault/{tu104.c => tu102.c} (83%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h index 127f48066026..1cb465acbb4b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h @@ -30,5 +30,5 @@ struct nvkm_fault_data { int gp100_fault_new(struct nvkm_device *, int, struct nvkm_fault **); int gv100_fault_new(struct nvkm_device *, int, struct nvkm_fault **); -int tu104_fault_new(struct nvkm_device *, int, struct nvkm_fault **); +int tu102_fault_new(struct nvkm_device *, int, struct nvkm_fault **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index f87b298b192f..2f3d8da4f090 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2441,7 +2441,7 @@ nv162_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = tu102_devinit_new, - .fault = tu104_fault_new, + .fault = tu102_fault_new, .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, @@ -2473,7 +2473,7 @@ nv164_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = tu102_devinit_new, - .fault = tu104_fault_new, + .fault = tu102_fault_new, .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, @@ -2505,7 +2505,7 @@ nv166_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = tu102_devinit_new, - .fault = tu104_fault_new, + .fault = tu102_fault_new, .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild index 794eb1745b2f..801fd5c55945 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild @@ -1,4 +1,4 @@ nvkm-y += nvkm/subdev/fault/base.o nvkm-y += nvkm/subdev/fault/gp100.o nvkm-y += nvkm/subdev/fault/gv100.o -nvkm-y += nvkm/subdev/fault/tu104.o +nvkm-y += nvkm/subdev/fault/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c similarity index 83% rename from drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c index 9c8a3adf99d7..912425e6238d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c @@ -28,7 +28,7 @@ #include static void -tu104_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable) +tu102_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable) { /*XXX: Earlier versions of RM touched the old regs on Turing, * which don't appear to actually work anymore, but newer @@ -37,7 +37,7 @@ tu104_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable) } static void -tu104_fault_buffer_fini(struct nvkm_fault_buffer *buffer) +tu102_fault_buffer_fini(struct nvkm_fault_buffer *buffer) { struct nvkm_device *device = buffer->fault->subdev.device; const u32 foff = buffer->id * 0x20; @@ -45,7 +45,7 @@ tu104_fault_buffer_fini(struct nvkm_fault_buffer *buffer) } static void -tu104_fault_buffer_init(struct nvkm_fault_buffer *buffer) +tu102_fault_buffer_init(struct nvkm_fault_buffer *buffer) { struct nvkm_device *device = buffer->fault->subdev.device; const u32 foff = buffer->id * 0x20; @@ -57,7 +57,7 @@ tu104_fault_buffer_init(struct nvkm_fault_buffer *buffer) } static void -tu104_fault_buffer_info(struct nvkm_fault_buffer *buffer) +tu102_fault_buffer_info(struct nvkm_fault_buffer *buffer) { struct nvkm_device *device = buffer->fault->subdev.device; const u32 foff = buffer->id * 0x20; @@ -70,7 +70,7 @@ tu104_fault_buffer_info(struct nvkm_fault_buffer *buffer) } static void -tu104_fault_intr_fault(struct nvkm_fault *fault) +tu102_fault_intr_fault(struct nvkm_fault *fault) { struct nvkm_subdev *subdev = &fault->subdev; struct nvkm_device *device = subdev->device; @@ -96,14 +96,14 @@ tu104_fault_intr_fault(struct nvkm_fault *fault) } static void -tu104_fault_intr(struct nvkm_fault *fault) +tu102_fault_intr(struct nvkm_fault *fault) { struct nvkm_subdev *subdev = &fault->subdev; struct nvkm_device *device = subdev->device; u32 stat = nvkm_rd32(device, 0xb83094); if (stat & 0x80000000) { - tu104_fault_intr_fault(fault); + tu102_fault_intr_fault(fault); nvkm_wr32(device, 0xb83094, 0x80000000); stat &= ~0x80000000; } @@ -129,7 +129,7 @@ tu104_fault_intr(struct nvkm_fault *fault) } static void -tu104_fault_fini(struct nvkm_fault *fault) +tu102_fault_fini(struct nvkm_fault *fault) { nvkm_notify_put(&fault->nrpfb); if (fault->buffer[0]) @@ -138,7 +138,7 @@ tu104_fault_fini(struct nvkm_fault *fault) } static void -tu104_fault_init(struct nvkm_fault *fault) +tu102_fault_init(struct nvkm_fault *fault) { /*XXX: enable priv faults */ fault->func->buffer.init(fault->buffer[0]); @@ -146,22 +146,22 @@ tu104_fault_init(struct nvkm_fault *fault) } static const struct nvkm_fault_func -tu104_fault = { +tu102_fault = { .oneinit = gv100_fault_oneinit, - .init = tu104_fault_init, - .fini = tu104_fault_fini, - .intr = tu104_fault_intr, + .init = tu102_fault_init, + .fini = tu102_fault_fini, + .intr = tu102_fault_intr, .buffer.nr = 2, .buffer.entry_size = 32, - .buffer.info = tu104_fault_buffer_info, - .buffer.init = tu104_fault_buffer_init, - .buffer.fini = tu104_fault_buffer_fini, - .buffer.intr = tu104_fault_buffer_intr, + .buffer.info = tu102_fault_buffer_info, + .buffer.init = tu102_fault_buffer_init, + .buffer.fini = tu102_fault_buffer_fini, + .buffer.intr = tu102_fault_buffer_intr, }; int -tu104_fault_new(struct nvkm_device *device, int index, +tu102_fault_new(struct nvkm_device *device, int index, struct nvkm_fault **pfault) { - return nvkm_fault_new_(&tu104_fault, device, index, pfault); + return nvkm_fault_new_(&tu102_fault, device, index, pfault); } From 8603774233507550d557272146ba1834071e3e49 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 12:10:06 +1000 Subject: [PATCH 07/60] drm/nouveau/disp/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/core.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/curs.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/wimm.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 2 +- drivers/gpu/drm/nouveau/include/nvif/class.h | 10 +++++----- .../drm/nouveau/include/nvkm/engine/disp.h | 2 +- drivers/gpu/drm/nouveau/nvif/disp.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 6 +++--- .../gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 6 +++--- .../gpu/drm/nouveau/nvkm/engine/disp/ior.h | 2 +- .../drm/nouveau/nvkm/engine/disp/rootnv50.h | 2 +- .../engine/disp/{roottu104.c => roottu102.c} | 20 +++++++++---------- .../engine/disp/{sortu104.c => sortu102.c} | 14 ++++++------- .../nvkm/engine/disp/{tu104.c => tu102.c} | 14 ++++++------- 14 files changed, 43 insertions(+), 43 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/engine/disp/{roottu104.c => roottu102.c} (74%) rename drivers/gpu/drm/nouveau/nvkm/engine/disp/{sortu104.c => sortu102.c} (90%) rename drivers/gpu/drm/nouveau/nvkm/engine/disp/{tu104.c => tu102.c} (93%) diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c index c25e0ebe3c92..27ea3f34706d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/core.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core.c @@ -42,7 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore) int version; int (*new)(struct nouveau_drm *, s32, struct nv50_core **); } cores[] = { - { TU104_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, + { TU102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new }, { GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, { GP100_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c index cb6e4d2b1b45..121c24a18f11 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/curs.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c @@ -31,7 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) int version; int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); } curses[] = { - { TU104_DISP_CURSOR, 0, cursc37a_new }, + { TU102_DISP_CURSOR, 0, cursc37a_new }, { GV100_DISP_CURSOR, 0, cursc37a_new }, { GK104_DISP_CURSOR, 0, curs907a_new }, { GF110_DISP_CURSOR, 0, curs907a_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c index bc9eeaf212ae..a1ac153d5e98 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c @@ -31,7 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw) int version; int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *); } wimms[] = { - { TU104_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, + { TU102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, {} }; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index ba9eea2ff16b..b95181027b31 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -626,7 +626,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, int (*new)(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); } wndws[] = { - { TU104_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new }, + { TU102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new }, { GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new }, {} }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 1d82cbf70cf4..214cb6ff93cd 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -84,7 +84,7 @@ #define GP100_DISP /* cl5070.h */ 0x00009770 #define GP102_DISP /* cl5070.h */ 0x00009870 #define GV100_DISP /* cl5070.h */ 0x0000c370 -#define TU104_DISP /* cl5070.h */ 0x0000c570 +#define TU102_DISP /* cl5070.h */ 0x0000c570 #define NV31_MPEG 0x00003174 #define G82_MPEG 0x00008274 @@ -97,7 +97,7 @@ #define GF110_DISP_CURSOR /* cl507a.h */ 0x0000907a #define GK104_DISP_CURSOR /* cl507a.h */ 0x0000917a #define GV100_DISP_CURSOR /* cl507a.h */ 0x0000c37a -#define TU104_DISP_CURSOR /* cl507a.h */ 0x0000c57a +#define TU102_DISP_CURSOR /* cl507a.h */ 0x0000c57a #define NV50_DISP_OVERLAY /* cl507b.h */ 0x0000507b #define G82_DISP_OVERLAY /* cl507b.h */ 0x0000827b @@ -106,7 +106,7 @@ #define GK104_DISP_OVERLAY /* cl507b.h */ 0x0000917b #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c37b -#define TU104_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c57b +#define TU102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c57b #define NV50_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000507c #define G82_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000827c @@ -129,7 +129,7 @@ #define GP100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000977d #define GP102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000987d #define GV100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c37d -#define TU104_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c57d +#define TU102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c57d #define NV50_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000507e #define G82_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000827e @@ -139,7 +139,7 @@ #define GK104_DISP_OVERLAY_CONTROL_DMA /* cl507e.h */ 0x0000917e #define GV100_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c37e -#define TU104_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c57e +#define TU102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c57e #define NV50_TESLA 0x00005097 #define G82_TESLA 0x00008297 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h index 5ca86e178bb9..3026b22d44fb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h @@ -36,5 +36,5 @@ int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); -int tu104_disp_new(struct nvkm_device *, int, struct nvkm_disp **); +int tu102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); #endif diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c index ef97dd223a32..61638b3b9d3d 100644 --- a/drivers/gpu/drm/nouveau/nvif/disp.c +++ b/drivers/gpu/drm/nouveau/nvif/disp.c @@ -34,7 +34,7 @@ int nvif_disp_ctor(struct nvif_device *device, s32 oclass, struct nvif_disp *disp) { static const struct nvif_mclass disps[] = { - { TU104_DISP, -1 }, + { TU102_DISP, -1 }, { GV100_DISP, -1 }, { GP102_DISP, -1 }, { GP100_DISP, -1 }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 2f3d8da4f090..856f5a2c3a4b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2461,7 +2461,7 @@ nv162_chipset = { .ce[2] = tu104_ce_new, .ce[3] = tu104_ce_new, .ce[4] = tu104_ce_new, - .disp = tu104_disp_new, + .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu104_fifo_new, }; @@ -2493,7 +2493,7 @@ nv164_chipset = { .ce[2] = tu104_ce_new, .ce[3] = tu104_ce_new, .ce[4] = tu104_ce_new, - .disp = tu104_disp_new, + .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu104_fifo_new, }; @@ -2525,7 +2525,7 @@ nv166_chipset = { .ce[2] = tu104_ce_new, .ce[3] = tu104_ce_new, .ce[4] = tu104_ce_new, - .disp = tu104_disp_new, + .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu104_fifo_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index c6a257ba4347..2c28a5e747cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -15,7 +15,7 @@ nvkm-y += nvkm/engine/disp/gm200.o nvkm-y += nvkm/engine/disp/gp100.o nvkm-y += nvkm/engine/disp/gp102.o nvkm-y += nvkm/engine/disp/gv100.o -nvkm-y += nvkm/engine/disp/tu104.o +nvkm-y += nvkm/engine/disp/tu102.o nvkm-y += nvkm/engine/disp/vga.o nvkm-y += nvkm/engine/disp/head.o @@ -39,7 +39,7 @@ nvkm-y += nvkm/engine/disp/sorgk104.o nvkm-y += nvkm/engine/disp/sorgm107.o nvkm-y += nvkm/engine/disp/sorgm200.o nvkm-y += nvkm/engine/disp/sorgv100.o -nvkm-y += nvkm/engine/disp/sortu104.o +nvkm-y += nvkm/engine/disp/sortu102.o nvkm-y += nvkm/engine/disp/outp.o nvkm-y += nvkm/engine/disp/dp.o @@ -71,7 +71,7 @@ nvkm-y += nvkm/engine/disp/rootgm200.o nvkm-y += nvkm/engine/disp/rootgp100.o nvkm-y += nvkm/engine/disp/rootgp102.o nvkm-y += nvkm/engine/disp/rootgv100.o -nvkm-y += nvkm/engine/disp/roottu104.o +nvkm-y += nvkm/engine/disp/roottu102.o nvkm-y += nvkm/engine/disp/channv50.o nvkm-y += nvkm/engine/disp/changf119.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index 790e42f460fd..1681ddccd298 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -201,5 +201,5 @@ int gm200_sor_new(struct nvkm_disp *, int); int gv100_sor_cnt(struct nvkm_disp *, unsigned long *); int gv100_sor_new(struct nvkm_disp *, int); -int tu104_sor_new(struct nvkm_disp *, int); +int tu102_sor_new(struct nvkm_disp *, int); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h index 97de928cbde1..aee9822a7a87 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h @@ -37,5 +37,5 @@ extern const struct nvkm_disp_oclass gm200_disp_root_oclass; extern const struct nvkm_disp_oclass gp100_disp_root_oclass; extern const struct nvkm_disp_oclass gp102_disp_root_oclass; extern const struct nvkm_disp_oclass gv100_disp_root_oclass; -extern const struct nvkm_disp_oclass tu104_disp_root_oclass; +extern const struct nvkm_disp_oclass tu102_disp_root_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c similarity index 74% rename from drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c index ad438c62f66c..579a5d02308a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c @@ -25,28 +25,28 @@ #include static const struct nv50_disp_root_func -tu104_disp_root = { +tu102_disp_root = { .user = { - {{0,0,TU104_DISP_CURSOR }, gv100_disp_curs_new }, - {{0,0,TU104_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new }, - {{0,0,TU104_DISP_CORE_CHANNEL_DMA }, gv100_disp_core_new }, - {{0,0,TU104_DISP_WINDOW_CHANNEL_DMA }, gv100_disp_wndw_new }, + {{0,0,TU102_DISP_CURSOR }, gv100_disp_curs_new }, + {{0,0,TU102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new }, + {{0,0,TU102_DISP_CORE_CHANNEL_DMA }, gv100_disp_core_new }, + {{0,0,TU102_DISP_WINDOW_CHANNEL_DMA }, gv100_disp_wndw_new }, {} }, }; static int -tu104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass, +tu102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { - return nv50_disp_root_new_(&tu104_disp_root, disp, oclass, + return nv50_disp_root_new_(&tu102_disp_root, disp, oclass, data, size, pobject); } const struct nvkm_disp_oclass -tu104_disp_root_oclass = { - .base.oclass = TU104_DISP, +tu102_disp_root_oclass = { + .base.oclass = TU102_DISP, .base.minver = -1, .base.maxver = -1, - .ctor = tu104_disp_root_new, + .ctor = tu102_disp_root_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c similarity index 90% rename from drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c index df026a525ef1..d57b73ada89e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c @@ -24,7 +24,7 @@ #include static void -tu104_sor_dp_vcpi(struct nvkm_ior *sor, int head, +tu102_sor_dp_vcpi(struct nvkm_ior *sor, int head, u8 slot, u8 slot_nr, u16 pbn, u16 aligned) { struct nvkm_device *device = sor->disp->engine.subdev.device; @@ -35,7 +35,7 @@ tu104_sor_dp_vcpi(struct nvkm_ior *sor, int head, } static int -tu104_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) +tu102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) { struct nvkm_device *device = sor->disp->engine.subdev.device; const u32 soff = nv50_ior_base(sor); @@ -62,7 +62,7 @@ tu104_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) } static const struct nvkm_ior_func -tu104_sor = { +tu102_sor = { .route = { .get = gm200_sor_route_get, .set = gm200_sor_route_set, @@ -75,11 +75,11 @@ tu104_sor = { }, .dp = { .lanes = { 0, 1, 2, 3 }, - .links = tu104_sor_dp_links, + .links = tu102_sor_dp_links, .power = g94_sor_dp_power, .pattern = gm107_sor_dp_pattern, .drive = gm200_sor_dp_drive, - .vcpi = tu104_sor_dp_vcpi, + .vcpi = tu102_sor_dp_vcpi, .audio = gv100_sor_dp_audio, .audio_sym = gv100_sor_dp_audio_sym, .watermark = gv100_sor_dp_watermark, @@ -91,7 +91,7 @@ tu104_sor = { }; int -tu104_sor_new(struct nvkm_disp *disp, int id) +tu102_sor_new(struct nvkm_disp *disp, int id) { - return nvkm_ior_new_(&tu104_sor, disp, SOR, id); + return nvkm_ior_new_(&tu102_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c similarity index 93% rename from drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c index 13fa21459d38..883ae4151ff8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c @@ -29,7 +29,7 @@ #include static int -tu104_disp_init(struct nv50_disp *disp) +tu102_disp_init(struct nv50_disp *disp) { struct nvkm_device *device = disp->base.engine.subdev.device; struct nvkm_head *head; @@ -132,21 +132,21 @@ tu104_disp_init(struct nv50_disp *disp) } static const struct nv50_disp_func -tu104_disp = { - .init = tu104_disp_init, +tu102_disp = { + .init = tu102_disp_init, .fini = gv100_disp_fini, .intr = gv100_disp_intr, .uevent = &gv100_disp_chan_uevent, .super = gv100_disp_super, - .root = &tu104_disp_root_oclass, + .root = &tu102_disp_root_oclass, .wndw = { .cnt = gv100_disp_wndw_cnt }, .head = { .cnt = gv100_head_cnt, .new = gv100_head_new }, - .sor = { .cnt = gv100_sor_cnt, .new = tu104_sor_new }, + .sor = { .cnt = gv100_sor_cnt, .new = tu102_sor_new }, .ramht_size = 0x2000, }; int -tu104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) +tu102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(&tu104_disp, device, index, pdisp); + return nv50_disp_new_(&tu102_disp, device, index, pdisp); } From f10271ffdaba959594b40a1eb6bb321d657d7ee5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 12:11:47 +1000 Subject: [PATCH 08/60] drm/nouveau/fifo/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/engine/fifo.h | 2 +- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 6 ++-- .../gpu/drm/nouveau/nvkm/engine/fifo/Kbuild | 6 ++-- .../drm/nouveau/nvkm/engine/fifo/changk104.h | 2 +- .../fifo/{gpfifotu104.c => gpfifotu102.c} | 10 +++---- .../nvkm/engine/fifo/{tu104.c => tu102.c} | 30 +++++++++---------- .../gpu/drm/nouveau/nvkm/engine/fifo/user.h | 2 +- .../engine/fifo/{usertu104.c => usertu102.c} | 10 +++---- 8 files changed, 34 insertions(+), 34 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/engine/fifo/{gpfifotu104.c => gpfifotu102.c} (91%) rename drivers/gpu/drm/nouveau/nvkm/engine/fifo/{tu104.c => tu102.c} (82%) rename drivers/gpu/drm/nouveau/nvkm/engine/fifo/{usertu104.c => usertu102.c} (86%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h index 3b2b685778eb..b7fc04dd1628 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h @@ -74,5 +74,5 @@ int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); int gp10b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); int gv100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); -int tu104_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); +int tu102_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 856f5a2c3a4b..11a6013a999e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2463,7 +2463,7 @@ nv162_chipset = { .ce[4] = tu104_ce_new, .disp = tu102_disp_new, .dma = gv100_dma_new, - .fifo = tu104_fifo_new, + .fifo = tu102_fifo_new, }; static const struct nvkm_device_chip @@ -2495,7 +2495,7 @@ nv164_chipset = { .ce[4] = tu104_ce_new, .disp = tu102_disp_new, .dma = gv100_dma_new, - .fifo = tu104_fifo_new, + .fifo = tu102_fifo_new, }; static const struct nvkm_device_chip @@ -2527,7 +2527,7 @@ nv166_chipset = { .ce[4] = tu104_ce_new, .disp = tu102_disp_new, .dma = gv100_dma_new, - .fifo = tu104_fifo_new, + .fifo = tu102_fifo_new, }; static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild index 87d8e054e40a..05aada541ea5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild @@ -16,7 +16,7 @@ nvkm-y += nvkm/engine/fifo/gm20b.o nvkm-y += nvkm/engine/fifo/gp100.o nvkm-y += nvkm/engine/fifo/gp10b.o nvkm-y += nvkm/engine/fifo/gv100.o -nvkm-y += nvkm/engine/fifo/tu104.o +nvkm-y += nvkm/engine/fifo/tu102.o nvkm-y += nvkm/engine/fifo/chan.o nvkm-y += nvkm/engine/fifo/channv50.o @@ -34,7 +34,7 @@ nvkm-y += nvkm/engine/fifo/gpfifog84.o nvkm-y += nvkm/engine/fifo/gpfifogf100.o nvkm-y += nvkm/engine/fifo/gpfifogk104.o nvkm-y += nvkm/engine/fifo/gpfifogv100.o -nvkm-y += nvkm/engine/fifo/gpfifotu104.o +nvkm-y += nvkm/engine/fifo/gpfifotu102.o nvkm-y += nvkm/engine/fifo/usergv100.o -nvkm-y += nvkm/engine/fifo/usertu104.o +nvkm-y += nvkm/engine/fifo/usertu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h index a14545d871d8..f8557cdfbd81 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h @@ -47,6 +47,6 @@ int gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *, int gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *, struct nvkm_engine *, bool); -int tu104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *, +int tu102_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *, void *data, u32 size, struct nvkm_object **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c similarity index 91% rename from drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c index ff70484dd01a..abef7fb6e2d3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c @@ -29,14 +29,14 @@ #include static u32 -tu104_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base) +tu102_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base) { struct gk104_fifo_chan *chan = gk104_fifo_chan(base); return (chan->runl << 16) | chan->base.chid; } static const struct nvkm_fifo_chan_func -tu104_fifo_gpfifo = { +tu102_fifo_gpfifo = { .dtor = gk104_fifo_gpfifo_dtor, .init = gk104_fifo_gpfifo_init, .fini = gk104_fifo_gpfifo_fini, @@ -45,11 +45,11 @@ tu104_fifo_gpfifo = { .engine_dtor = gk104_fifo_gpfifo_engine_dtor, .engine_init = gv100_fifo_gpfifo_engine_init, .engine_fini = gv100_fifo_gpfifo_engine_fini, - .submit_token = tu104_fifo_gpfifo_submit_token, + .submit_token = tu102_fifo_gpfifo_submit_token, }; int -tu104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, +tu102_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { struct nvkm_object *parent = oclass->parent; @@ -67,7 +67,7 @@ tu104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, args->v0.ilength, args->v0.runlist, args->v0.priv); if (args->v0.priv && !oclass->client->super) return -EINVAL; - return gv100_fifo_gpfifo_new_(&tu104_fifo_gpfifo, fifo, + return gv100_fifo_gpfifo_new_(&tu102_fifo_gpfifo, fifo, &args->v0.runlist, &args->v0.chid, args->v0.vmm, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c similarity index 82% rename from drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c index 98c80705bc61..005f3e1729b9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c @@ -29,7 +29,7 @@ #include static void -tu104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl, +tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl, struct nvkm_memory *mem, int nr) { struct nvkm_device *device = fifo->base.engine.subdev.device; @@ -44,15 +44,15 @@ tu104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl, } const struct gk104_fifo_runlist_func -tu104_fifo_runlist = { +tu102_fifo_runlist = { .size = 16, .cgrp = gv100_fifo_runlist_cgrp, .chan = gv100_fifo_runlist_chan, - .commit = tu104_fifo_runlist_commit, + .commit = tu102_fifo_runlist_commit, }; static const struct nvkm_enum -tu104_fifo_fault_engine[] = { +tu102_fifo_fault_engine[] = { { 0x01, "DISPLAY" }, { 0x03, "PTP" }, { 0x06, "PWR_PMU" }, @@ -80,7 +80,7 @@ tu104_fifo_fault_engine[] = { }; static void -tu104_fifo_pbdma_init(struct gk104_fifo *fifo) +tu102_fifo_pbdma_init(struct gk104_fifo *fifo) { struct nvkm_device *device = fifo->base.engine.subdev.device; const u32 mask = (1 << fifo->pbdma_nr) - 1; @@ -89,28 +89,28 @@ tu104_fifo_pbdma_init(struct gk104_fifo *fifo) } static const struct gk104_fifo_pbdma_func -tu104_fifo_pbdma = { +tu102_fifo_pbdma = { .nr = gm200_fifo_pbdma_nr, - .init = tu104_fifo_pbdma_init, + .init = tu102_fifo_pbdma_init, .init_timeout = gk208_fifo_pbdma_init_timeout, }; static const struct gk104_fifo_func -tu104_fifo = { - .pbdma = &tu104_fifo_pbdma, +tu102_fifo = { + .pbdma = &tu102_fifo_pbdma, .fault.access = gv100_fifo_fault_access, - .fault.engine = tu104_fifo_fault_engine, + .fault.engine = tu102_fifo_fault_engine, .fault.reason = gv100_fifo_fault_reason, .fault.hubclient = gv100_fifo_fault_hubclient, .fault.gpcclient = gv100_fifo_fault_gpcclient, - .runlist = &tu104_fifo_runlist, - .user = {{-1,-1,VOLTA_USERMODE_A }, tu104_fifo_user_new }, - .chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu104_fifo_gpfifo_new }, + .runlist = &tu102_fifo_runlist, + .user = {{-1,-1,VOLTA_USERMODE_A }, tu102_fifo_user_new }, + .chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu102_fifo_gpfifo_new }, .cgrp_force = true, }; int -tu104_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo) +tu102_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo) { - return gk104_fifo_new_(&tu104_fifo, device, index, 4096, pfifo); + return gk104_fifo_new_(&tu102_fifo, device, index, 4096, pfifo); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h index 14b0c6bde8eb..54a3a3092cc0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h @@ -3,6 +3,6 @@ #include "priv.h" int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **); -int tu104_fifo_user_new(const struct nvkm_oclass *, void *, u32, +int tu102_fifo_user_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c similarity index 86% rename from drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c index 8f98548a21f6..217268f8ccad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c @@ -22,7 +22,7 @@ #include "user.h" static int -tu104_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc, +tu102_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc, enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nvkm_device *device = object->engine->subdev.device; @@ -33,13 +33,13 @@ tu104_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc, } static const struct nvkm_object_func -tu104_fifo_user = { - .map = tu104_fifo_user_map, +tu102_fifo_user = { + .map = tu102_fifo_user_map, }; int -tu104_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, +tu102_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject) { - return nvkm_object_new_(&tu104_fifo_user, oclass, argv, argc, pobject); + return nvkm_object_new_(&tu102_fifo_user, oclass, argv, argc, pobject); } From b6c82854761f6fa0c1b1c28699bbac5c9840ec73 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 17 Jan 2019 12:13:00 +1000 Subject: [PATCH 09/60] drm/nouveau/ce/tu102: rename implementation from tu104 Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/engine/ce.h | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild | 2 +- .../nvkm/engine/ce/{tu104.c => tu102.c} | 6 ++-- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 30 +++++++++---------- 4 files changed, 20 insertions(+), 20 deletions(-) rename drivers/gpu/drm/nouveau/nvkm/engine/ce/{tu104.c => tu102.c} (91%) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h index 86abe76023c2..5f3650692e4d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h @@ -11,5 +11,5 @@ int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **); int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **); int gp102_ce_new(struct nvkm_device *, int, struct nvkm_engine **); int gv100_ce_new(struct nvkm_device *, int, struct nvkm_engine **); -int tu104_ce_new(struct nvkm_device *, int, struct nvkm_engine **); +int tu102_ce_new(struct nvkm_device *, int, struct nvkm_engine **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild index 177a23301d6a..9211663239af 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild @@ -6,4 +6,4 @@ nvkm-y += nvkm/engine/ce/gm200.o nvkm-y += nvkm/engine/ce/gp100.o nvkm-y += nvkm/engine/ce/gp102.o nvkm-y += nvkm/engine/ce/gv100.o -nvkm-y += nvkm/engine/ce/tu104.o +nvkm-y += nvkm/engine/ce/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c similarity index 91% rename from drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c rename to drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c index 3c25043bbb33..b4308e2d8c75 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c @@ -24,7 +24,7 @@ #include static const struct nvkm_engine_func -tu104_ce = { +tu102_ce = { .intr = gp100_ce_intr, .sclass = { { -1, -1, TURING_DMA_COPY_A }, @@ -33,8 +33,8 @@ tu104_ce = { }; int -tu104_ce_new(struct nvkm_device *device, int index, +tu102_ce_new(struct nvkm_device *device, int index, struct nvkm_engine **pengine) { - return nvkm_engine_new_(&tu104_ce, device, index, true, pengine); + return nvkm_engine_new_(&tu102_ce, device, index, true, pengine); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 11a6013a999e..e6b962f664d8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2456,11 +2456,11 @@ nv162_chipset = { .therm = gp100_therm_new, .timer = gk20a_timer_new, .top = gk104_top_new, - .ce[0] = tu104_ce_new, - .ce[1] = tu104_ce_new, - .ce[2] = tu104_ce_new, - .ce[3] = tu104_ce_new, - .ce[4] = tu104_ce_new, + .ce[0] = tu102_ce_new, + .ce[1] = tu102_ce_new, + .ce[2] = tu102_ce_new, + .ce[3] = tu102_ce_new, + .ce[4] = tu102_ce_new, .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, @@ -2488,11 +2488,11 @@ nv164_chipset = { .therm = gp100_therm_new, .timer = gk20a_timer_new, .top = gk104_top_new, - .ce[0] = tu104_ce_new, - .ce[1] = tu104_ce_new, - .ce[2] = tu104_ce_new, - .ce[3] = tu104_ce_new, - .ce[4] = tu104_ce_new, + .ce[0] = tu102_ce_new, + .ce[1] = tu102_ce_new, + .ce[2] = tu102_ce_new, + .ce[3] = tu102_ce_new, + .ce[4] = tu102_ce_new, .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, @@ -2520,11 +2520,11 @@ nv166_chipset = { .therm = gp100_therm_new, .timer = gk20a_timer_new, .top = gk104_top_new, - .ce[0] = tu104_ce_new, - .ce[1] = tu104_ce_new, - .ce[2] = tu104_ce_new, - .ce[3] = tu104_ce_new, - .ce[4] = tu104_ce_new, + .ce[0] = tu102_ce_new, + .ce[1] = tu102_ce_new, + .ce[2] = tu102_ce_new, + .ce[3] = tu102_ce_new, + .ce[4] = tu102_ce_new, .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, From d83d345338b10b560cf4532802be0973493075bb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 4 Sep 2018 16:23:33 +0100 Subject: [PATCH 10/60] drm/nouveau/bios/dp: make array vsoff static, shrinks object size Don't populate the array vsoff on the stack but instead make it static. Makes the object code smaller by 67 bytes: Before: text data bss dec hex filename 5753 112 0 5865 16e9 .../nouveau/nvkm/subdev/bios/dp.o After: text data bss dec hex filename 5622 176 0 5798 16a6 .../nouveau/nvkm/subdev/bios/dp.o (gcc version 8.2.0 x86_64) Signed-off-by: Colin Ian King Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c index 3133b28f849c..b099d1209be8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c @@ -212,7 +212,7 @@ nvbios_dpcfg_match(struct nvkm_bios *bios, u16 outp, u8 pc, u8 vs, u8 pe, u16 data; if (*ver >= 0x30) { - const u8 vsoff[] = { 0, 4, 7, 9 }; + static const u8 vsoff[] = { 0, 4, 7, 9 }; idx = (pc * 10) + vsoff[vs] + pe; if (*ver >= 0x40 && *ver <= 0x41 && *hdr >= 0x12) idx += nvbios_rd08(bios, outp + 0x11) * 40; From 13649101a25c53c87f4ab98a076dfe61f3636ab1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 25 Nov 2018 17:09:18 +0000 Subject: [PATCH 11/60] drm/nouveau/bios/ramcfg: fix missing parentheses when calculating RON Currently, the expression for calculating RON is always going to result in zero no matter the value of ram->mr[1] because the ! operator has higher precedence than the shift >> operator. I believe the missing parentheses around the expression before appying the ! operator will result in the desired result. [ Note, not tested ] Detected by CoveritScan, CID#1324005 ("Operands don't affect result") Fixes: c25bf7b6155c ("drm/nouveau/bios/ramcfg: Separate out RON pull value") Signed-off-by: Colin Ian King Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c index 60ece0a8a2e1..1d2d6bae73cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c @@ -87,7 +87,7 @@ nvkm_gddr3_calc(struct nvkm_ram *ram) WR = (ram->next->bios.timing[2] & 0x007f0000) >> 16; /* XXX: Get these values from the VBIOS instead */ DLL = !(ram->mr[1] & 0x1); - RON = !(ram->mr[1] & 0x300) >> 8; + RON = !((ram->mr[1] & 0x300) >> 8); break; default: return -ENOSYS; From b1d03fc36ec9834465a08c275c8d563e07f6f6bf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Dec 2018 15:29:49 +0000 Subject: [PATCH 12/60] drm/nouveau/pmu: don't print reply values if exec is false Currently the uninitialized values in the array reply are printed out when exec is false and nvkm_pmu_send has not updated the array. Avoid confusion by only dumping out these values if they have been actually updated. Detected by CoverityScan, CID#1271291 ("Uninitialized scaler variable") Fixes: ebb58dc2ef8c ("drm/nouveau/pmu: rename from pwr (no binary change)") Signed-off-by: Colin Ian King Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c index 11b28b086a06..7b052879af72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c @@ -88,10 +88,10 @@ nvkm_memx_fini(struct nvkm_memx **pmemx, bool exec) if (exec) { nvkm_pmu_send(pmu, reply, PROC_MEMX, MEMX_MSG_EXEC, memx->base, finish); + nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n", + reply[0], reply[1]); } - nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n", - reply[0], reply[1]); kfree(memx); return 0; } From b513a18cf1d705bd04efd91c417e79e4938be093 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 28 Jan 2019 16:03:50 -0500 Subject: [PATCH 13/60] drm/nouveau: Don't WARN_ON VCPI allocation failures This is much louder then we want. VCPI allocation failures are quite normal, since they will happen if any part of the modesetting process is interrupted by removing the DP MST topology in question. So just print a debugging message on VCPI failures instead. Signed-off-by: Lyude Paul Fixes: f479c0ba4a17 ("drm/nouveau/kms/nv50: initial support for DP 1.2 multi-stream") Cc: Ben Skeggs Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: # v4.10+ Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index e8bb35f6d015..3f618ed4ec6f 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -817,7 +817,8 @@ nv50_msto_enable(struct drm_encoder *encoder) r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, armh->dp.pbn, armh->dp.tu); - WARN_ON(!r); + if (!r) + DRM_DEBUG_KMS("Failed to allocate VCPI\n"); if (!mstm->links++) nv50_outp_acquire(mstm->outp); From 966b2217d2b94ef44c55184f99a40d48f9ad593b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 29 Jan 2019 14:30:46 -0600 Subject: [PATCH 14/60] drm/nouveau: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/gpu/drm/nouveau/nouveau_bo.c:1434:53: warning: this statement may fall through [-Wimplicit-fallthrough=] Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 73eff52036d2..a72be71c45b4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1434,7 +1434,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) if (drm->client.mem->oclass < NVIF_CLASS_MEM_NV50 || !mem->kind) /* untiled */ break; - /* fallthrough, tiled memory */ + /* fall through - tiled memory */ case TTM_PL_VRAM: reg->bus.offset = reg->start << PAGE_SHIFT; reg->bus.base = device->func->resource_addr(device, 1); From 785cf1eeafa23ec63f426d322401054d13abe2a3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 8 Oct 2018 21:47:36 +0100 Subject: [PATCH 15/60] drm/nouveau: fix missing break in switch statement The NOUVEAU_GETPARAM_PCI_DEVICE case is missing a break statement and falls through to the following NOUVEAU_GETPARAM_BUS_TYPE case and may end up re-assigning the getparam->value to an undesired value. Fix this by adding in the missing break. Detected by CoverityScan, CID#1460507 ("Missing break in switch") Fixes: 359088d5b8ec ("drm/nouveau: remove trivial cases of nvxx_device() usage") Signed-off-by: Colin Ian King Reviewed-by: Gustavo A. R. Silva Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index b06cdac8f3a2..a95ec3783f39 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -214,6 +214,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) WARN_ON(1); break; } + break; case NOUVEAU_GETPARAM_FB_SIZE: getparam->value = drm->gem.vram_available; break; From 78cdadb8405e8364d990e7b7183b3d404f4de9bf Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 16/60] drm/nouveau/core: define GSP subdev Exact meaning of the acronym is unknown, but we need this for Turing ACR. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/core/device.h | 3 +++ drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 9 +++++++++ drivers/gpu/drm/nouveau/nvkm/core/subdev.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild | 0 7 files changed, 17 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 72e4dc1f0236..642492344196 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -28,6 +28,7 @@ enum nvkm_devidx { NVKM_SUBDEV_ICCSENSE, NVKM_SUBDEV_THERM, NVKM_SUBDEV_CLK, + NVKM_SUBDEV_GSP, NVKM_SUBDEV_SECBOOT, NVKM_ENGINE_BSP, @@ -137,6 +138,7 @@ struct nvkm_device { struct nvkm_fb *fb; struct nvkm_fuse *fuse; struct nvkm_gpio *gpio; + struct nvkm_gsp *gsp; struct nvkm_i2c *i2c; struct nvkm_subdev *ibus; struct nvkm_iccsense *iccsense; @@ -209,6 +211,7 @@ struct nvkm_device_chip { int (*fb )(struct nvkm_device *, int idx, struct nvkm_fb **); int (*fuse )(struct nvkm_device *, int idx, struct nvkm_fuse **); int (*gpio )(struct nvkm_device *, int idx, struct nvkm_gpio **); + int (*gsp )(struct nvkm_device *, int idx, struct nvkm_gsp **); int (*i2c )(struct nvkm_device *, int idx, struct nvkm_i2c **); int (*ibus )(struct nvkm_device *, int idx, struct nvkm_subdev **); int (*iccsense)(struct nvkm_device *, int idx, struct nvkm_iccsense **); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h new file mode 100644 index 000000000000..00448990beef --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -0,0 +1,9 @@ +#ifndef __NVKM_GSP_H__ +#define __NVKM_GSP_H__ +#define nvkm_gsp(p) container_of((p), struct nvkm_gsp, subdev) +#include + +struct nvkm_gsp { + struct nvkm_subdev subdev; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c index c61b467cf45e..245990de1e90 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c @@ -39,6 +39,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = { [NVKM_SUBDEV_FB ] = "fb", [NVKM_SUBDEV_FUSE ] = "fuse", [NVKM_SUBDEV_GPIO ] = "gpio", + [NVKM_SUBDEV_GSP ] = "gsp", [NVKM_SUBDEV_I2C ] = "i2c", [NVKM_SUBDEV_IBUS ] = "priv", [NVKM_SUBDEV_ICCSENSE] = "iccsense", diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e6b962f664d8..5e8cb1dc54dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2567,6 +2567,7 @@ nvkm_device_subdev(struct nvkm_device *device, int index) _(FB , device->fb , &device->fb->subdev); _(FUSE , device->fuse , &device->fuse->subdev); _(GPIO , device->gpio , &device->gpio->subdev); + _(GSP , device->gsp , &device->gsp->subdev); _(I2C , device->i2c , &device->i2c->subdev); _(IBUS , device->ibus , device->ibus); _(ICCSENSE, device->iccsense, &device->iccsense->subdev); @@ -3050,6 +3051,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, _(NVKM_SUBDEV_FB , fb); _(NVKM_SUBDEV_FUSE , fuse); _(NVKM_SUBDEV_GPIO , gpio); + _(NVKM_SUBDEV_GSP , gsp); _(NVKM_SUBDEV_I2C , i2c); _(NVKM_SUBDEV_IBUS , ibus); _(NVKM_SUBDEV_ICCSENSE, iccsense); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h index 253ab914a8ef..2a53e37dfa7a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild index cfdffef1afb9..a339fe03d423 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild @@ -7,6 +7,7 @@ include $(src)/nvkm/subdev/fault/Kbuild include $(src)/nvkm/subdev/fb/Kbuild include $(src)/nvkm/subdev/fuse/Kbuild include $(src)/nvkm/subdev/gpio/Kbuild +include $(src)/nvkm/subdev/gsp/Kbuild include $(src)/nvkm/subdev/i2c/Kbuild include $(src)/nvkm/subdev/ibus/Kbuild include $(src)/nvkm/subdev/iccsense/Kbuild diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild new file mode 100644 index 000000000000..e69de29bb2d1 From eec9ffe47f60db8e2ff96b49939d4c2b1be2c36f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 17/60] drm/nouveau/top: add function to lookup PRI address for devices Will be using this in upcoming changes to avoid the need for entirely new subdevs to deal with Turing register moves. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/top.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h index f7d3eb647e2e..2904e67d79d2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h @@ -9,6 +9,7 @@ struct nvkm_top { struct list_head device; }; +u32 nvkm_top_addr(struct nvkm_device *, enum nvkm_devidx); u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx); u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs); u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c index 67ada1d9a28c..cce6e4e90ebf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c @@ -40,6 +40,22 @@ nvkm_top_device_new(struct nvkm_top *top) return info; } +u32 +nvkm_top_addr(struct nvkm_device *device, enum nvkm_devidx index) +{ + struct nvkm_top *top = device->top; + struct nvkm_top_device *info; + + if (top) { + list_for_each_entry(info, &top->device, head) { + if (info->index == index) + return info->addr; + } + } + + return 0; +} + u32 nvkm_top_reset(struct nvkm_device *device, enum nvkm_devidx index) { From 7975dfc36a0be20875373617b92ec87a47d8c565 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 18/60] drm/nouveau/top/gv100-: translate entry for the GSP So we're able to connect fault/interrupt handling to the GSP subdev. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c index 39081eadfd84..e01746ce9fc4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c @@ -73,6 +73,7 @@ gk104_top_oneinit(struct nvkm_top *top) #define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A #define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1) \ info->index = NVKM_ENGINE_##A##0 + inst +#define C_(A) if (inst == 0) info->index = NVKM_SUBDEV_##A switch (type) { case 0x00000000: A_(GR ); break; case 0x00000001: A_(CE0 ); break; @@ -88,6 +89,7 @@ gk104_top_oneinit(struct nvkm_top *top) case 0x0000000f: A_(NVENC1); break; case 0x00000010: B_(NVDEC ); break; case 0x00000013: B_(CE ); break; + case 0x00000014: C_(GSP ); break; break; default: break; From 2944b19b5cb5109c67944086de49ef0b47e64d83 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 19/60] drm/nouveau/gsp/gv100-: instantiate GSP falcon We need this for Turing ACR, but it's present from Volta onwards. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 5 ++ .../gpu/drm/nouveau/nvkm/engine/device/base.c | 4 ++ drivers/gpu/drm/nouveau/nvkm/falcon/base.c | 3 + .../gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c | 62 +++++++++++++++++++ 5 files changed, 75 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index 00448990beef..4c672a5c4cd5 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -5,5 +5,10 @@ struct nvkm_gsp { struct nvkm_subdev subdev; + u32 addr; + + struct nvkm_falcon *falcon; }; + +int gv100_gsp_new(struct nvkm_device *, int, struct nvkm_gsp **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 5e8cb1dc54dd..b583586e9f56 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2405,6 +2405,7 @@ nv140_chipset = { .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, + .gsp = gv100_gsp_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, @@ -2445,6 +2446,7 @@ nv162_chipset = { .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, + .gsp = gv100_gsp_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, @@ -2477,6 +2479,7 @@ nv164_chipset = { .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, + .gsp = gv100_gsp_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, @@ -2509,6 +2512,7 @@ nv166_chipset = { .fb = gv100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, + .gsp = gv100_gsp_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index 427340153640..366c87de6e72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -204,6 +204,9 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func, debug_reg = 0x408; falcon->has_emem = true; break; + case NVKM_SUBDEV_GSP: + debug_reg = 0x0; /*XXX*/ + break; default: nvkm_warn(subdev, "unsupported falcon %s!\n", nvkm_subdev_name[subdev->index]); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild index e69de29bb2d1..26fc6feb807e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild @@ -0,0 +1 @@ +nvkm-y += nvkm/subdev/gsp/gv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c new file mode 100644 index 000000000000..dccfaf1162e2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c @@ -0,0 +1,62 @@ +/* + * Copyright 2019 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include + +static int +gv100_gsp_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_gsp *gsp = nvkm_gsp(subdev); + + gsp->addr = nvkm_top_addr(subdev->device, subdev->index); + if (!gsp->addr) + return -EINVAL; + + return nvkm_falcon_v1_new(subdev, "GSP", gsp->addr, &gsp->falcon); +} + +static void * +gv100_gsp_dtor(struct nvkm_subdev *subdev) +{ + struct nvkm_gsp *gsp = nvkm_gsp(subdev); + nvkm_falcon_del(&gsp->falcon); + return gsp; +} + +static const struct nvkm_subdev_func +gv100_gsp = { + .dtor = gv100_gsp_dtor, + .oneinit = gv100_gsp_oneinit, +}; + +int +gv100_gsp_new(struct nvkm_device *device, int index, struct nvkm_gsp **pgsp) +{ + struct nvkm_gsp *gsp; + + if (!(gsp = *pgsp = kzalloc(sizeof(*gsp), GFP_KERNEL))) + return -ENOMEM; + + nvkm_subdev_ctor(&gv100_gsp, device, index, &gsp->subdev); + return 0; +} From 04574273509cb03a345d91f3156f316d5c887fad Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 20/60] drm/nouveau/nvdec/gp102-: utilise engine PRI address from TOP Turing has its NVDEC instances in an alternate location. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h index fe716859d4a9..b72a4844c5f7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h @@ -6,6 +6,8 @@ struct nvkm_nvdec { struct nvkm_engine engine; + u32 addr; + struct nvkm_falcon *falcon; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c index 4807021fd990..4a63581bdd5e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c @@ -21,13 +21,21 @@ */ #include "priv.h" +#include #include static int nvkm_nvdec_oneinit(struct nvkm_engine *engine) { struct nvkm_nvdec *nvdec = nvkm_nvdec(engine); - return nvkm_falcon_v1_new(&nvdec->engine.subdev, "NVDEC", 0x84000, + struct nvkm_subdev *subdev = &nvdec->engine.subdev; + + nvdec->addr = nvkm_top_addr(subdev->device, subdev->index); + if (!nvdec->addr) + return -EINVAL; + + /*XXX: fix naming of this when adding support for multiple-NVDEC */ + return nvkm_falcon_v1_new(subdev, "NVDEC", nvdec->addr, &nvdec->falcon); } From 1a346934909b2ec90a19ce453c68eb2186580e0e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 21/60] drm/nouveau/nvdec/tu102-: instantiate NVDEC0 falcon Required to run VPR scrubber binary as part of secboot. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b583586e9f56..8e52f0ec21a0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2466,6 +2466,7 @@ nv162_chipset = { .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, + .nvdec[0] = gp102_nvdec_new, }; static const struct nvkm_device_chip @@ -2499,6 +2500,7 @@ nv164_chipset = { .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, + .nvdec[0] = gp102_nvdec_new, }; static const struct nvkm_device_chip @@ -2532,6 +2534,7 @@ nv166_chipset = { .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, + .nvdec[0] = gp102_nvdec_new, }; static int From fdad518362505f0bf09b323ee6c46f18b81f3e62 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 22/60] drm/nouveau/sec2: utilise engine PRI address from TOP Turing has its SEC2 instance in an alternate location, and this avoids needing to duplicate the code here for it. Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/engine/sec2.h | 2 ++ .../gpu/drm/nouveau/nvkm/engine/sec2/base.c | 23 +++++++++++++------ .../gpu/drm/nouveau/nvkm/engine/sec2/gp102.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/sec2/priv.h | 3 +-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h index f7d89822b905..66684456daf7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h @@ -5,6 +5,8 @@ struct nvkm_sec2 { struct nvkm_engine engine; + u32 addr; + struct nvkm_falcon *falcon; struct nvkm_msgqueue *queue; struct work_struct work; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c index f865d2a3e184..1b49e5b6717f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c @@ -22,6 +22,7 @@ #include "priv.h" #include +#include #include static void * @@ -39,18 +40,18 @@ nvkm_sec2_intr(struct nvkm_engine *engine) struct nvkm_sec2 *sec2 = nvkm_sec2(engine); struct nvkm_subdev *subdev = &engine->subdev; struct nvkm_device *device = subdev->device; - u32 disp = nvkm_rd32(device, 0x8701c); - u32 intr = nvkm_rd32(device, 0x87008) & disp & ~(disp >> 16); + u32 disp = nvkm_rd32(device, sec2->addr + 0x01c); + u32 intr = nvkm_rd32(device, sec2->addr + 0x008) & disp & ~(disp >> 16); if (intr & 0x00000040) { schedule_work(&sec2->work); - nvkm_wr32(device, 0x87004, 0x00000040); + nvkm_wr32(device, sec2->addr + 0x004, 0x00000040); intr &= ~0x00000040; } if (intr) { nvkm_error(subdev, "unhandled intr %08x\n", intr); - nvkm_wr32(device, 0x87004, intr); + nvkm_wr32(device, sec2->addr + 0x004, intr); } } @@ -74,8 +75,15 @@ static int nvkm_sec2_oneinit(struct nvkm_engine *engine) { struct nvkm_sec2 *sec2 = nvkm_sec2(engine); - return nvkm_falcon_v1_new(&sec2->engine.subdev, "SEC2", 0x87000, - &sec2->falcon); + struct nvkm_subdev *subdev = &sec2->engine.subdev; + + if (!sec2->addr) { + sec2->addr = nvkm_top_addr(subdev->device, subdev->index); + if (WARN_ON(!sec2->addr)) + return -EINVAL; + } + + return nvkm_falcon_v1_new(subdev, "SEC2", sec2->addr, &sec2->falcon); } static int @@ -95,13 +103,14 @@ nvkm_sec2 = { }; int -nvkm_sec2_new_(struct nvkm_device *device, int index, +nvkm_sec2_new_(struct nvkm_device *device, int index, u32 addr, struct nvkm_sec2 **psec2) { struct nvkm_sec2 *sec2; if (!(sec2 = *psec2 = kzalloc(sizeof(*sec2), GFP_KERNEL))) return -ENOMEM; + sec2->addr = addr; INIT_WORK(&sec2->work, nvkm_sec2_recv); return nvkm_engine_ctor(&nvkm_sec2, device, index, true, &sec2->engine); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c index 9be1524c08f5..858cf27fa010 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c @@ -26,5 +26,5 @@ int gp102_sec2_new(struct nvkm_device *device, int index, struct nvkm_sec2 **psec2) { - return nvkm_sec2_new_(device, index, psec2); + return nvkm_sec2_new_(device, index, 0, psec2); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h index 2f97c806a79d..ab0165e2d1a3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h @@ -5,6 +5,5 @@ #define nvkm_sec2(p) container_of((p), struct nvkm_sec2, engine) -int nvkm_sec2_new_(struct nvkm_device *, int, struct nvkm_sec2 **); - +int nvkm_sec2_new_(struct nvkm_device *, int, u32 addr, struct nvkm_sec2 **); #endif From 8d2c1e337604fbffc3fc01475b97584a0cb85082 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 23/60] drm/nouveau/sec2/tu102-: instantiate SEC2 falcon Required for ACR. Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/engine/sec2.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 ++ .../gpu/drm/nouveau/nvkm/engine/sec2/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/engine/sec2/tu102.c | 33 +++++++++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h index 66684456daf7..c93ad332461a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h @@ -13,4 +13,5 @@ struct nvkm_sec2 { }; int gp102_sec2_new(struct nvkm_device *, int, struct nvkm_sec2 **); +int tu102_sec2_new(struct nvkm_device *, int, struct nvkm_sec2 **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 8e52f0ec21a0..7971096b6767 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2467,6 +2467,7 @@ nv162_chipset = { .dma = gv100_dma_new, .fifo = tu102_fifo_new, .nvdec[0] = gp102_nvdec_new, + .sec2 = tu102_sec2_new, }; static const struct nvkm_device_chip @@ -2501,6 +2502,7 @@ nv164_chipset = { .dma = gv100_dma_new, .fifo = tu102_fifo_new, .nvdec[0] = gp102_nvdec_new, + .sec2 = tu102_sec2_new, }; static const struct nvkm_device_chip @@ -2535,6 +2537,7 @@ nv166_chipset = { .dma = gv100_dma_new, .fifo = tu102_fifo_new, .nvdec[0] = gp102_nvdec_new, + .sec2 = tu102_sec2_new, }; static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild index 4b17254cfbd0..d9cdea7d9353 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild @@ -1,2 +1,3 @@ nvkm-y += nvkm/engine/sec2/base.o nvkm-y += nvkm/engine/sec2/gp102.o +nvkm-y += nvkm/engine/sec2/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c new file mode 100644 index 000000000000..d655576164b1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c @@ -0,0 +1,33 @@ +/* + * Copyright 2019 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "priv.h" + +int +tu102_sec2_new(struct nvkm_device *device, int index, + struct nvkm_sec2 **psec2) +{ + /* TOP info wasn't updated on Turing to reflect the PRI + * address change for some reason. We override it here. + */ + return nvkm_sec2_new_(device, index, 0x840000, psec2); +} From c774ce66c5e8b59e1674684a56fbd420bb3f3ebc Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 24/60] drm/nouveau/secboot: fix missing newline in error messages Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 5c14d6ac855d..1df09ed6fe6d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -853,7 +853,7 @@ acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb) * and the expected behavior on RM as well */ if (ret && ret != 0x1d) { - nvkm_error(subdev, "HS unload failed, ret 0x%08x", ret); + nvkm_error(subdev, "HS unload failed, ret 0x%08x\n", ret); return -EINVAL; } nvkm_debug(subdev, "HS unload blob completed\n"); @@ -922,7 +922,7 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) if (ret < 0) { return ret; } else if (ret > 0) { - nvkm_error(subdev, "HS load failed, ret 0x%08x", ret); + nvkm_error(subdev, "HS load failed, ret 0x%08x\n", ret); return -EINVAL; } nvkm_debug(subdev, "HS load blob completed\n"); From 81f2bb5d65721acf1d98017623c0dd603f2bc767 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 25/60] drm/nouveau/bios/init: label existing INIT_GENERIC_CONDITION types Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c index 9cc10e438b3d..5146359430df 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c @@ -806,12 +806,12 @@ init_generic_condition(struct nvbios_init *init) init->offset += 3; switch (cond) { - case 0: + case 0: /* CONDITION_ID_INT_DP. */ if (init_conn(init) != DCB_CONNECTOR_eDP) init_exec_set(init, false); break; - case 1: - case 2: + case 1: /* CONDITION_ID_USE_SPPLL0. */ + case 2: /* CONDITION_ID_USE_SPPLL1. */ if ( init->outp && (data = nvbios_dpout_match(bios, DCB_OUTPUT_DP, (init->outp->or << 0) | @@ -826,7 +826,7 @@ init_generic_condition(struct nvbios_init *init) if (init_exec(init)) warn("script needs dp output table data\n"); break; - case 5: + case 5: /* CONDITION_ID_ASSR_SUPPORT. */ if (!(init_rdauxr(init, 0x0d) & 1)) init_exec_set(init, false); break; From eb972d1474717a329ac6b682e9e9e9f17197ff10 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 26/60] drm/nouveau/bios/init: handle INIT_GENERIC_CONDITION_ID_NO_PANEL_SEQ_DELAYS As I currently understand it, this is related to features we have no support for as of yet. In theory, this change should be a noop, just without the warning. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c index 5146359430df..ec0e9f7224b5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c @@ -830,6 +830,9 @@ init_generic_condition(struct nvbios_init *init) if (!(init_rdauxr(init, 0x0d) & 1)) init_exec_set(init, false); break; + case 7: /* CONDITION_ID_NO_PANEL_SEQ_DELAYS. */ + init_exec_set(init, false); + break; default: warn("INIT_GENERIC_CONDITON: unknown 0x%02x\n", cond); init->offset += size; From a8ce8b65e108e49b02e876be410f391fc413fb61 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 27/60] drm/nouveau/disp/gf119-: decode exception reason to human-readable string We also change the error strings to match NVIDIA's naming. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c | 16 +++++++++++----- drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c | 7 +++++-- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 12 +++++++----- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h | 3 +++ 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c index 794e90982641..e675d9b9d5d7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c @@ -91,15 +91,21 @@ gf119_disp_intr_error(struct nv50_disp *disp, int chid) { struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - u32 mthd = nvkm_rd32(device, 0x6101f0 + (chid * 12)); + u32 stat = nvkm_rd32(device, 0x6101f0 + (chid * 12)); + u32 type = (stat & 0x00007000) >> 12; + u32 mthd = (stat & 0x00000ffc); u32 data = nvkm_rd32(device, 0x6101f4 + (chid * 12)); - u32 unkn = nvkm_rd32(device, 0x6101f8 + (chid * 12)); + u32 code = nvkm_rd32(device, 0x6101f8 + (chid * 12)); + const struct nvkm_enum *reason = + nvkm_enum_find(nv50_disp_intr_error_type, type); - nvkm_error(subdev, "chid %d mthd %04x data %08x %08x %08x\n", - chid, (mthd & 0x0000ffc), data, mthd, unkn); + nvkm_error(subdev, "chid %d stat %08x reason %d [%s] mthd %04x " + "data %08x code %08x\n", + chid, stat, type, reason ? reason->name : "", + mthd, data, code); if (chid < ARRAY_SIZE(disp->chan)) { - switch (mthd & 0xffc) { + switch (mthd) { case 0x0080: nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR); break; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c index 47be0ba4aebe..892be6c9b76c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c @@ -103,10 +103,13 @@ gv100_disp_exception(struct nv50_disp *disp, int chid) u32 mthd = (stat & 0x00000fff) << 2; u32 data = nvkm_rd32(device, 0x611024 + (chid * 12)); u32 code = nvkm_rd32(device, 0x611028 + (chid * 12)); + const struct nvkm_enum *reason = + nvkm_enum_find(nv50_disp_intr_error_type, type); - nvkm_error(subdev, "chid %d %08x [type %d mthd %04x] " + nvkm_error(subdev, "chid %d stat %08x reason %d [%s] mthd %04x " "data %08x code %08x\n", - chid, stat, type, mthd, data, code); + chid, stat, type, reason ? reason->name : "", + mthd, data, code); if (chid < ARRAY_SIZE(disp->chan) && disp->chan[chid]) { switch (mthd) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index def005dd5fda..e21556bf2cb1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -28,7 +28,6 @@ #include "rootnv50.h" #include -#include #include #include #include @@ -593,12 +592,15 @@ nv50_disp_super(struct work_struct *work) nvkm_wr32(device, 0x610030, 0x80000000); } -static const struct nvkm_enum +const struct nvkm_enum nv50_disp_intr_error_type[] = { - { 3, "ILLEGAL_MTHD" }, - { 4, "INVALID_VALUE" }, + { 0, "NONE" }, + { 1, "PUSHBUFFER_ERR" }, + { 2, "TRAP" }, + { 3, "RESERVED_METHOD" }, + { 4, "INVALID_ARG" }, { 5, "INVALID_STATE" }, - { 7, "INVALID_HANDLE" }, + { 7, "UNRESOLVABLE_HANDLE" }, {} }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index c36a8a7cafa1..e5d00f478bb1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -5,6 +5,8 @@ #include "priv.h" struct nvkm_head; +#include + struct nv50_disp { const struct nv50_disp_func *func; struct nvkm_disp base; @@ -71,6 +73,7 @@ int nv50_disp_init(struct nv50_disp *); void nv50_disp_fini(struct nv50_disp *); void nv50_disp_intr(struct nv50_disp *); void nv50_disp_super(struct work_struct *); +extern const struct nvkm_enum nv50_disp_intr_error_type[]; int gf119_disp_init(struct nv50_disp *); void gf119_disp_fini(struct nv50_disp *); From d7f9bb656ea4032d7ac59fd01597ffd3994bc0d9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 28/60] drm/nouveau: allocate kernel channel(s) before initialising display Some of the pre-NV50 depends on SW methods to implement synchronisation for page flips, and we want to move this setup out of common code, thus we require the channel to have been allocation before display init. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index f900e94592f8..2c665f9aba93 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -504,6 +504,8 @@ nouveau_drm_device_init(struct drm_device *dev) if (ret) goto fail_bios; + nouveau_accel_init(drm); + ret = nouveau_display_create(dev); if (ret) goto fail_dispctor; @@ -516,7 +518,6 @@ nouveau_drm_device_init(struct drm_device *dev) nouveau_debugfs_init(drm); nouveau_hwmon_init(dev); - nouveau_accel_init(drm); nouveau_fbcon_init(dev); nouveau_led_init(dev); @@ -534,6 +535,7 @@ nouveau_drm_device_init(struct drm_device *dev) fail_dispinit: nouveau_display_destroy(dev); fail_dispctor: + nouveau_accel_fini(drm); nouveau_bios_takedown(dev); fail_bios: nouveau_ttm_fini(drm); @@ -559,7 +561,6 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_led_fini(dev); nouveau_fbcon_fini(dev); - nouveau_accel_fini(drm); nouveau_hwmon_fini(dev); nouveau_debugfs_fini(drm); @@ -567,6 +568,7 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_display_fini(dev, false, false); nouveau_display_destroy(dev); + nouveau_accel_fini(drm); nouveau_bios_takedown(dev); nouveau_ttm_fini(drm); From ba801ef068c1deed08531ff70e16c4847c338c73 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 29/60] drm/nouveau/kms: display destroy/init/fini hooks can be static Swapped order of functions in dispnv04 to allow this, but no code changes. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv04/disp.c | 110 ++++++++++++------------ drivers/gpu/drm/nouveau/dispnv04/disp.h | 3 - drivers/gpu/drm/nouveau/dispnv50/disp.c | 6 +- 3 files changed, 58 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 1727d399833c..f2abae39fdca 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -31,6 +31,61 @@ #include "nouveau_encoder.h" #include "nouveau_connector.h" +static void +nv04_display_fini(struct drm_device *dev) +{ + /* Disable vblank interrupts. */ + NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0); + if (nv_two_heads(dev)) + NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0); +} + +static int +nv04_display_init(struct drm_device *dev) +{ + struct nouveau_encoder *encoder; + struct nouveau_crtc *crtc; + + /* meh.. modeset apparently doesn't setup all the regs and depends + * on pre-existing state, for now load the state of the card *before* + * nouveau was loaded, and then do a modeset. + * + * best thing to do probably is to make save/restore routines not + * save/restore "pre-load" state, but more general so we can save + * on suspend too. + */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) + crtc->save(&crtc->base); + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head) + encoder->enc_save(&encoder->base.base); + + return 0; +} + +static void +nv04_display_destroy(struct drm_device *dev) +{ + struct nv04_display *disp = nv04_display(dev); + struct nouveau_drm *drm = nouveau_drm(dev); + struct nouveau_encoder *encoder; + struct nouveau_crtc *nv_crtc; + + /* Restore state */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head) + encoder->enc_restore(&encoder->base.base); + + list_for_each_entry(nv_crtc, &dev->mode_config.crtc_list, base.head) + nv_crtc->restore(&nv_crtc->base); + + nouveau_hw_save_vga_fonts(dev, 0); + + nouveau_display(dev)->priv = NULL; + kfree(disp); + + nvif_object_unmap(&drm->client.device.object); +} + int nv04_display_create(struct drm_device *dev) { @@ -121,58 +176,3 @@ nv04_display_create(struct drm_device *dev) return 0; } - -void -nv04_display_destroy(struct drm_device *dev) -{ - struct nv04_display *disp = nv04_display(dev); - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_encoder *encoder; - struct nouveau_crtc *nv_crtc; - - /* Restore state */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head) - encoder->enc_restore(&encoder->base.base); - - list_for_each_entry(nv_crtc, &dev->mode_config.crtc_list, base.head) - nv_crtc->restore(&nv_crtc->base); - - nouveau_hw_save_vga_fonts(dev, 0); - - nouveau_display(dev)->priv = NULL; - kfree(disp); - - nvif_object_unmap(&drm->client.device.object); -} - -int -nv04_display_init(struct drm_device *dev) -{ - struct nouveau_encoder *encoder; - struct nouveau_crtc *crtc; - - /* meh.. modeset apparently doesn't setup all the regs and depends - * on pre-existing state, for now load the state of the card *before* - * nouveau was loaded, and then do a modeset. - * - * best thing to do probably is to make save/restore routines not - * save/restore "pre-load" state, but more general so we can save - * on suspend too. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) - crtc->save(&crtc->base); - - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head) - encoder->enc_save(&encoder->base.base); - - return 0; -} - -void -nv04_display_fini(struct drm_device *dev) -{ - /* disable vblank interrupts */ - NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0); - if (nv_two_heads(dev)) - NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0); -} diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h index f74f1f2b186e..91bbc0c104eb 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h @@ -92,9 +92,6 @@ nv04_display(struct drm_device *dev) /* nv04_display.c */ int nv04_display_create(struct drm_device *); -void nv04_display_destroy(struct drm_device *); -int nv04_display_init(struct drm_device *); -void nv04_display_fini(struct drm_device *); /* nv04_crtc.c */ int nv04_crtc_create(struct drm_device *, int index); diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 3f618ed4ec6f..07aee824b364 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2221,7 +2221,7 @@ nv50_disp_func = { * Init *****************************************************************************/ -void +static void nv50_display_fini(struct drm_device *dev) { struct nouveau_encoder *nv_encoder; @@ -2243,7 +2243,7 @@ nv50_display_fini(struct drm_device *dev) } } -int +static int nv50_display_init(struct drm_device *dev) { struct nv50_core *core = nv50_disp(dev)->core; @@ -2270,7 +2270,7 @@ nv50_display_init(struct drm_device *dev) return 0; } -void +static void nv50_display_destroy(struct drm_device *dev) { struct nv50_disp *disp = nv50_disp(dev); From fcd6f048386da34d970bfeee25ac3c3fc28819ad Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 30/60] drm/nouveau/kms/nv04-nv4x: move a bunch of pre-nv50 page flip code to dispnv04 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv04/crtc.c | 214 ++++++++++++++++++++- drivers/gpu/drm/nouveau/dispnv04/disp.c | 19 ++ drivers/gpu/drm/nouveau/dispnv04/disp.h | 2 + drivers/gpu/drm/nouveau/nouveau_display.c | 215 +--------------------- drivers/gpu/drm/nouveau/nouveau_display.h | 15 -- drivers/gpu/drm/nouveau/nouveau_drm.c | 13 -- drivers/gpu/drm/nouveau/nouveau_drv.h | 1 - drivers/gpu/drm/nouveau/nouveau_fence.h | 2 - 8 files changed, 235 insertions(+), 246 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c index 2c569e264df3..f22f01020625 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c @@ -40,6 +40,7 @@ #include "nvreg.h" #include "nouveau_fbcon.h" #include "disp.h" +#include "nouveau_dma.h" #include #include @@ -1077,12 +1078,223 @@ nouveau_crtc_set_config(struct drm_mode_set *set, return ret; } +struct nv04_page_flip_state { + struct list_head head; + struct drm_pending_vblank_event *event; + struct drm_crtc *crtc; + int bpp, pitch; + u64 offset; +}; + +static int +nv04_finish_page_flip(struct nouveau_channel *chan, + struct nv04_page_flip_state *ps) +{ + struct nouveau_fence_chan *fctx = chan->fence; + struct nouveau_drm *drm = chan->drm; + struct drm_device *dev = drm->dev; + struct nv04_page_flip_state *s; + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + + if (list_empty(&fctx->flip)) { + NV_ERROR(drm, "unexpected pageflip\n"); + spin_unlock_irqrestore(&dev->event_lock, flags); + return -EINVAL; + } + + s = list_first_entry(&fctx->flip, struct nv04_page_flip_state, head); + if (s->event) { + drm_crtc_arm_vblank_event(s->crtc, s->event); + } else { + /* Give up ownership of vblank for page-flipped crtc */ + drm_crtc_vblank_put(s->crtc); + } + + list_del(&s->head); + if (ps) + *ps = *s; + kfree(s); + + spin_unlock_irqrestore(&dev->event_lock, flags); + return 0; +} + +int +nv04_flip_complete(struct nvif_notify *notify) +{ + struct nouveau_cli *cli = (void *)notify->object->client; + struct nouveau_drm *drm = cli->drm; + struct nouveau_channel *chan = drm->channel; + struct nv04_page_flip_state state; + + if (!nv04_finish_page_flip(chan, &state)) { + nv_set_crtc_base(drm->dev, drm_crtc_index(state.crtc), + state.offset + state.crtc->y * + state.pitch + state.crtc->x * + state.bpp / 8); + } + + return NVIF_NOTIFY_KEEP; +} + +static int +nv04_page_flip_emit(struct nouveau_channel *chan, + struct nouveau_bo *old_bo, + struct nouveau_bo *new_bo, + struct nv04_page_flip_state *s, + struct nouveau_fence **pfence) +{ + struct nouveau_fence_chan *fctx = chan->fence; + struct nouveau_drm *drm = chan->drm; + struct drm_device *dev = drm->dev; + unsigned long flags; + int ret; + + /* Queue it to the pending list */ + spin_lock_irqsave(&dev->event_lock, flags); + list_add_tail(&s->head, &fctx->flip); + spin_unlock_irqrestore(&dev->event_lock, flags); + + /* Synchronize with the old framebuffer */ + ret = nouveau_fence_sync(old_bo, chan, false, false); + if (ret) + goto fail; + + /* Emit the pageflip */ + ret = RING_SPACE(chan, 2); + if (ret) + goto fail; + + BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1); + OUT_RING (chan, 0x00000000); + FIRE_RING (chan); + + ret = nouveau_fence_new(chan, false, pfence); + if (ret) + goto fail; + + return 0; +fail: + spin_lock_irqsave(&dev->event_lock, flags); + list_del(&s->head); + spin_unlock_irqrestore(&dev->event_lock, flags); + return ret; +} + +static int +nv04_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, u32 flags, + struct drm_modeset_acquire_ctx *ctx) +{ + const int swap_interval = (flags & DRM_MODE_PAGE_FLIP_ASYNC) ? 0 : 1; + struct drm_device *dev = crtc->dev; + struct nouveau_drm *drm = nouveau_drm(dev); + struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->primary->fb)->nvbo; + struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo; + struct nv04_page_flip_state *s; + struct nouveau_channel *chan; + struct nouveau_cli *cli; + struct nouveau_fence *fence; + struct nv04_display *dispnv04 = nv04_display(dev); + int head = nouveau_crtc(crtc)->index; + int ret; + + chan = drm->channel; + if (!chan) + return -ENODEV; + cli = (void *)chan->user.client; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + if (new_bo != old_bo) { + ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM, true); + if (ret) + goto fail_free; + } + + mutex_lock(&cli->mutex); + ret = ttm_bo_reserve(&new_bo->bo, true, false, NULL); + if (ret) + goto fail_unpin; + + /* synchronise rendering channel with the kernel's channel */ + ret = nouveau_fence_sync(new_bo, chan, false, true); + if (ret) { + ttm_bo_unreserve(&new_bo->bo); + goto fail_unpin; + } + + if (new_bo != old_bo) { + ttm_bo_unreserve(&new_bo->bo); + + ret = ttm_bo_reserve(&old_bo->bo, true, false, NULL); + if (ret) + goto fail_unpin; + } + + /* Initialize a page flip struct */ + *s = (struct nv04_page_flip_state) + { { }, event, crtc, fb->format->cpp[0] * 8, fb->pitches[0], + new_bo->bo.offset }; + + /* Keep vblanks on during flip, for the target crtc of this flip */ + drm_crtc_vblank_get(crtc); + + /* Emit a page flip */ + if (swap_interval) { + ret = RING_SPACE(chan, 8); + if (ret) + goto fail_unreserve; + + BEGIN_NV04(chan, NvSubImageBlit, 0x012c, 1); + OUT_RING (chan, 0); + BEGIN_NV04(chan, NvSubImageBlit, 0x0134, 1); + OUT_RING (chan, head); + BEGIN_NV04(chan, NvSubImageBlit, 0x0100, 1); + OUT_RING (chan, 0); + BEGIN_NV04(chan, NvSubImageBlit, 0x0130, 1); + OUT_RING (chan, 0); + } + + nouveau_bo_ref(new_bo, &dispnv04->image[head]); + + ret = nv04_page_flip_emit(chan, old_bo, new_bo, s, &fence); + if (ret) + goto fail_unreserve; + mutex_unlock(&cli->mutex); + + /* Update the crtc struct and cleanup */ + crtc->primary->fb = fb; + + nouveau_bo_fence(old_bo, fence, false); + ttm_bo_unreserve(&old_bo->bo); + if (old_bo != new_bo) + nouveau_bo_unpin(old_bo); + nouveau_fence_unref(&fence); + return 0; + +fail_unreserve: + drm_crtc_vblank_put(crtc); + ttm_bo_unreserve(&old_bo->bo); +fail_unpin: + mutex_unlock(&cli->mutex); + if (old_bo != new_bo) + nouveau_bo_unpin(new_bo); +fail_free: + kfree(s); + return ret; +} + static const struct drm_crtc_funcs nv04_crtc_funcs = { .cursor_set = nv04_crtc_cursor_set, .cursor_move = nv04_crtc_cursor_move, .gamma_set = nv_crtc_gamma_set, .set_config = nouveau_crtc_set_config, - .page_flip = nouveau_crtc_page_flip, + .page_flip = nv04_crtc_page_flip, .destroy = nv_crtc_destroy, }; diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index f2abae39fdca..bda02f660304 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -31,9 +31,16 @@ #include "nouveau_encoder.h" #include "nouveau_connector.h" +#include + static void nv04_display_fini(struct drm_device *dev) { + struct nv04_display *disp = nv04_display(dev); + + /* Disable flip completion events. */ + nvif_notify_put(&disp->flip); + /* Disable vblank interrupts. */ NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0); if (nv_two_heads(dev)) @@ -43,6 +50,7 @@ nv04_display_fini(struct drm_device *dev) static int nv04_display_init(struct drm_device *dev) { + struct nv04_display *disp = nv04_display(dev); struct nouveau_encoder *encoder; struct nouveau_crtc *crtc; @@ -60,6 +68,8 @@ nv04_display_init(struct drm_device *dev) list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head) encoder->enc_save(&encoder->base.base); + /* Enable flip completion events. */ + nvif_notify_get(&disp->flip); return 0; } @@ -80,6 +90,8 @@ nv04_display_destroy(struct drm_device *dev) nouveau_hw_save_vga_fonts(dev, 0); + nvif_notify_fini(&disp->flip); + nouveau_display(dev)->priv = NULL; kfree(disp); @@ -113,6 +125,13 @@ nv04_display_create(struct drm_device *dev) /* Pre-nv50 doesn't support atomic, so don't expose the ioctls */ dev->driver->driver_features &= ~DRIVER_ATOMIC; + /* Request page flip completion event. */ + if (drm->nvsw.client) { + nvif_notify_init(&drm->nvsw, nv04_flip_complete, + false, NV04_NVSW_NTFY_UEVENT, + NULL, 0, 0, &disp->flip); + } + nouveau_hw_save_vga_fonts(dev, 1); nv04_crtc_create(dev, 0); diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h index 91bbc0c104eb..c6ed20a09f4a 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h @@ -82,6 +82,7 @@ struct nv04_display { uint32_t saved_vga_font[4][16384]; uint32_t dac_users[4]; struct nouveau_bo *image[2]; + struct nvif_notify flip; }; static inline struct nv04_display * @@ -173,4 +174,5 @@ nouveau_bios_run_init_table(struct drm_device *dev, u16 table, ); } +int nv04_flip_complete(struct nvif_notify *); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 56b6ac1b8edd..1acf035db02e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -32,18 +32,13 @@ #include #include -#include - #include "nouveau_fbcon.h" -#include "dispnv04/hw.h" #include "nouveau_crtc.h" -#include "nouveau_dma.h" #include "nouveau_gem.h" #include "nouveau_connector.h" #include "nv50_display.h" -#include "nouveau_fence.h" - +#include #include #include @@ -415,7 +410,6 @@ int nouveau_display_init(struct drm_device *dev) { struct nouveau_display *disp = nouveau_display(dev); - struct nouveau_drm *drm = nouveau_drm(dev); struct drm_connector *connector; struct drm_connector_list_iter conn_iter; int ret; @@ -437,8 +431,6 @@ nouveau_display_init(struct drm_device *dev) } drm_connector_list_iter_end(&conn_iter); - /* enable flip completion events */ - nvif_notify_get(&drm->flip); return ret; } @@ -457,9 +449,6 @@ nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime) drm_helper_force_disable_all(dev); } - /* disable flip completion events */ - nvif_notify_put(&drm->flip); - /* disable hotplug interrupts */ drm_connector_list_iter_begin(dev, &conn_iter); nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { @@ -738,208 +727,6 @@ nouveau_display_resume(struct drm_device *dev, bool runtime) } } -static int -nouveau_page_flip_emit(struct nouveau_channel *chan, - struct nouveau_bo *old_bo, - struct nouveau_bo *new_bo, - struct nouveau_page_flip_state *s, - struct nouveau_fence **pfence) -{ - struct nouveau_fence_chan *fctx = chan->fence; - struct nouveau_drm *drm = chan->drm; - struct drm_device *dev = drm->dev; - unsigned long flags; - int ret; - - /* Queue it to the pending list */ - spin_lock_irqsave(&dev->event_lock, flags); - list_add_tail(&s->head, &fctx->flip); - spin_unlock_irqrestore(&dev->event_lock, flags); - - /* Synchronize with the old framebuffer */ - ret = nouveau_fence_sync(old_bo, chan, false, false); - if (ret) - goto fail; - - /* Emit the pageflip */ - ret = RING_SPACE(chan, 2); - if (ret) - goto fail; - - BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1); - OUT_RING (chan, 0x00000000); - FIRE_RING (chan); - - ret = nouveau_fence_new(chan, false, pfence); - if (ret) - goto fail; - - return 0; -fail: - spin_lock_irqsave(&dev->event_lock, flags); - list_del(&s->head); - spin_unlock_irqrestore(&dev->event_lock, flags); - return ret; -} - -int -nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, u32 flags, - struct drm_modeset_acquire_ctx *ctx) -{ - const int swap_interval = (flags & DRM_MODE_PAGE_FLIP_ASYNC) ? 0 : 1; - struct drm_device *dev = crtc->dev; - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->primary->fb)->nvbo; - struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo; - struct nouveau_page_flip_state *s; - struct nouveau_channel *chan; - struct nouveau_cli *cli; - struct nouveau_fence *fence; - struct nv04_display *dispnv04 = nv04_display(dev); - int head = nouveau_crtc(crtc)->index; - int ret; - - chan = drm->channel; - if (!chan) - return -ENODEV; - cli = (void *)chan->user.client; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - - if (new_bo != old_bo) { - ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM, true); - if (ret) - goto fail_free; - } - - mutex_lock(&cli->mutex); - ret = ttm_bo_reserve(&new_bo->bo, true, false, NULL); - if (ret) - goto fail_unpin; - - /* synchronise rendering channel with the kernel's channel */ - ret = nouveau_fence_sync(new_bo, chan, false, true); - if (ret) { - ttm_bo_unreserve(&new_bo->bo); - goto fail_unpin; - } - - if (new_bo != old_bo) { - ttm_bo_unreserve(&new_bo->bo); - - ret = ttm_bo_reserve(&old_bo->bo, true, false, NULL); - if (ret) - goto fail_unpin; - } - - /* Initialize a page flip struct */ - *s = (struct nouveau_page_flip_state) - { { }, event, crtc, fb->format->cpp[0] * 8, fb->pitches[0], - new_bo->bo.offset }; - - /* Keep vblanks on during flip, for the target crtc of this flip */ - drm_crtc_vblank_get(crtc); - - /* Emit a page flip */ - if (swap_interval) { - ret = RING_SPACE(chan, 8); - if (ret) - goto fail_unreserve; - - BEGIN_NV04(chan, NvSubImageBlit, 0x012c, 1); - OUT_RING (chan, 0); - BEGIN_NV04(chan, NvSubImageBlit, 0x0134, 1); - OUT_RING (chan, head); - BEGIN_NV04(chan, NvSubImageBlit, 0x0100, 1); - OUT_RING (chan, 0); - BEGIN_NV04(chan, NvSubImageBlit, 0x0130, 1); - OUT_RING (chan, 0); - } - - nouveau_bo_ref(new_bo, &dispnv04->image[head]); - - ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); - if (ret) - goto fail_unreserve; - mutex_unlock(&cli->mutex); - - /* Update the crtc struct and cleanup */ - crtc->primary->fb = fb; - - nouveau_bo_fence(old_bo, fence, false); - ttm_bo_unreserve(&old_bo->bo); - if (old_bo != new_bo) - nouveau_bo_unpin(old_bo); - nouveau_fence_unref(&fence); - return 0; - -fail_unreserve: - drm_crtc_vblank_put(crtc); - ttm_bo_unreserve(&old_bo->bo); -fail_unpin: - mutex_unlock(&cli->mutex); - if (old_bo != new_bo) - nouveau_bo_unpin(new_bo); -fail_free: - kfree(s); - return ret; -} - -int -nouveau_finish_page_flip(struct nouveau_channel *chan, - struct nouveau_page_flip_state *ps) -{ - struct nouveau_fence_chan *fctx = chan->fence; - struct nouveau_drm *drm = chan->drm; - struct drm_device *dev = drm->dev; - struct nouveau_page_flip_state *s; - unsigned long flags; - - spin_lock_irqsave(&dev->event_lock, flags); - - if (list_empty(&fctx->flip)) { - NV_ERROR(drm, "unexpected pageflip\n"); - spin_unlock_irqrestore(&dev->event_lock, flags); - return -EINVAL; - } - - s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head); - if (s->event) { - drm_crtc_arm_vblank_event(s->crtc, s->event); - } else { - /* Give up ownership of vblank for page-flipped crtc */ - drm_crtc_vblank_put(s->crtc); - } - - list_del(&s->head); - if (ps) - *ps = *s; - kfree(s); - - spin_unlock_irqrestore(&dev->event_lock, flags); - return 0; -} - -int -nouveau_flip_complete(struct nvif_notify *notify) -{ - struct nouveau_drm *drm = container_of(notify, typeof(*drm), flip); - struct nouveau_channel *chan = drm->channel; - struct nouveau_page_flip_state state; - - if (!nouveau_finish_page_flip(chan, &state)) { - nv_set_crtc_base(drm->dev, drm_crtc_index(state.crtc), - state.offset + state.crtc->y * - state.pitch + state.crtc->x * - state.bpp / 8); - } - - return NVIF_NOTIFY_KEEP; -} - int nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index eb77e41c2d4e..2676876a82c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -25,14 +25,6 @@ int nouveau_framebuffer_new(struct drm_device *, const struct drm_mode_fb_cmd2 *, struct nouveau_bo *, struct nouveau_framebuffer **); -struct nouveau_page_flip_state { - struct list_head head; - struct drm_pending_vblank_event *event; - struct drm_crtc *crtc; - int bpp, pitch; - u64 offset; -}; - struct nouveau_display { void *priv; void (*dtor)(struct drm_device *); @@ -71,13 +63,6 @@ bool nouveau_display_scanoutpos(struct drm_device *, unsigned int, bool, int *, int *, ktime_t *, ktime_t *, const struct drm_display_mode *); -int nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, - struct drm_modeset_acquire_ctx *ctx); -int nouveau_finish_page_flip(struct nouveau_channel *, - struct nouveau_page_flip_state *); - int nouveau_display_dumb_create(struct drm_file *, struct drm_device *, struct drm_mode_create_dumb *args); int nouveau_display_dumb_map_offset(struct drm_file *, struct drm_device *, diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 2c665f9aba93..4ee530c941b1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -44,7 +44,6 @@ #include #include #include -#include #include "nouveau_drv.h" #include "nouveau_dma.h" @@ -288,7 +287,6 @@ nouveau_accel_fini(struct nouveau_drm *drm) nouveau_channel_idle(drm->channel); nvif_object_fini(&drm->ntfy); nvkm_gpuobj_del(&drm->notify); - nvif_notify_fini(&drm->flip); nvif_object_fini(&drm->nvsw); nouveau_channel_del(&drm->channel); @@ -412,17 +410,6 @@ nouveau_accel_init(struct nouveau_drm *drm) BEGIN_NV04(drm->channel, NvSubSw, 0, 1); OUT_RING (drm->channel, drm->nvsw.handle); } - - ret = nvif_notify_init(&drm->nvsw, - nouveau_flip_complete, - false, NV04_NVSW_NTFY_UEVENT, - NULL, 0, 0, &drm->flip); - if (ret == 0) - ret = nvif_notify_get(&drm->flip); - if (ret) { - nouveau_accel_fini(drm); - return; - } } if (ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index d20b9ba4b1c1..0ea79c047d8c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -181,7 +181,6 @@ struct nouveau_drm { struct nouveau_fbdev *fbcon; struct nvif_object nvsw; struct nvif_object ntfy; - struct nvif_notify flip; /* nv10-nv40 tiling regions */ struct { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index b999e6058046..ad27caeca0fd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -82,8 +82,6 @@ int nv50_fence_create(struct nouveau_drm *); int nv84_fence_create(struct nouveau_drm *); int nvc0_fence_create(struct nouveau_drm *); -int nouveau_flip_complete(struct nvif_notify *); - struct nv84_fence_chan { struct nouveau_fence_chan base; struct nouveau_vma *vma; From f04a4186afb6799c44a486c12308d9469a2fa8f2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 31/60] drm/nouveau/kms/nv04-nv4x: move suspend code to dispnv04 fini hook It has no relevance to the atomic path used by newer GPUs. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv04/disp.c | 27 ++++++++++++++++++++++- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 26 +--------------------- drivers/gpu/drm/nouveau/nouveau_display.h | 2 +- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index bda02f660304..451e38a82fe7 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -30,13 +30,15 @@ #include "hw.h" #include "nouveau_encoder.h" #include "nouveau_connector.h" +#include "nouveau_bo.h" #include static void -nv04_display_fini(struct drm_device *dev) +nv04_display_fini(struct drm_device *dev, bool suspend) { struct nv04_display *disp = nv04_display(dev); + struct drm_crtc *crtc; /* Disable flip completion events. */ nvif_notify_put(&disp->flip); @@ -45,6 +47,29 @@ nv04_display_fini(struct drm_device *dev) NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0); if (nv_two_heads(dev)) NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0); + + if (!suspend) + return; + + /* Un-pin FB and cursors so they'll be evicted to system memory. */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_framebuffer *nouveau_fb; + + nouveau_fb = nouveau_framebuffer(crtc->primary->fb); + if (!nouveau_fb || !nouveau_fb->nvbo) + continue; + + nouveau_bo_unpin(nouveau_fb->nvbo); + } + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + if (nv_crtc->cursor.nvbo) { + if (nv_crtc->cursor.set_offset) + nouveau_bo_unmap(nv_crtc->cursor.nvbo); + nouveau_bo_unpin(nv_crtc->cursor.nvbo); + } + } } static int diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 07aee824b364..b8a04f178a67 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2222,7 +2222,7 @@ nv50_disp_func = { *****************************************************************************/ static void -nv50_display_fini(struct drm_device *dev) +nv50_display_fini(struct drm_device *dev, bool suspend) { struct nouveau_encoder *nv_encoder; struct drm_encoder *encoder; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 1acf035db02e..2e717224d1a3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -461,7 +461,7 @@ nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime) cancel_work_sync(&drm->hpd_work); drm_kms_helper_poll_disable(dev); - disp->fini(dev); + disp->fini(dev, suspend); } static void @@ -614,7 +614,6 @@ int nouveau_display_suspend(struct drm_device *dev, bool runtime) { struct nouveau_display *disp = nouveau_display(dev); - struct drm_crtc *crtc; if (drm_drv_uses_atomic_modeset(dev)) { if (!runtime) { @@ -625,32 +624,9 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime) return ret; } } - - nouveau_display_fini(dev, true, runtime); - return 0; } nouveau_display_fini(dev, true, runtime); - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct nouveau_framebuffer *nouveau_fb; - - nouveau_fb = nouveau_framebuffer(crtc->primary->fb); - if (!nouveau_fb || !nouveau_fb->nvbo) - continue; - - nouveau_bo_unpin(nouveau_fb->nvbo); - } - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - if (nv_crtc->cursor.nvbo) { - if (nv_crtc->cursor.set_offset) - nouveau_bo_unmap(nv_crtc->cursor.nvbo); - nouveau_bo_unpin(nv_crtc->cursor.nvbo); - } - } - return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 2676876a82c7..33ed788f5855 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -29,7 +29,7 @@ struct nouveau_display { void *priv; void (*dtor)(struct drm_device *); int (*init)(struct drm_device *); - void (*fini)(struct drm_device *); + void (*fini)(struct drm_device *, bool suspend); struct nvif_disp disp; From 0f9976dd97caac3de5308945eb5b5e1c7754b768 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 32/60] drm/nouveau/kms/nv04-nv4x: move resume code to dispnv04 init hook It has no relevance to the atomic path used by newer GPUs. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv04/disp.c | 70 +++++++++++++++++++++-- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 65 ++------------------- drivers/gpu/drm/nouveau/nouveau_display.h | 4 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 5 files changed, 74 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 451e38a82fe7..5713bacaee80 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -73,11 +73,13 @@ nv04_display_fini(struct drm_device *dev, bool suspend) } static int -nv04_display_init(struct drm_device *dev) +nv04_display_init(struct drm_device *dev, bool resume, bool runtime) { struct nv04_display *disp = nv04_display(dev); + struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_encoder *encoder; - struct nouveau_crtc *crtc; + struct drm_crtc *crtc; + int ret; /* meh.. modeset apparently doesn't setup all the regs and depends * on pre-existing state, for now load the state of the card *before* @@ -87,14 +89,74 @@ nv04_display_init(struct drm_device *dev) * save/restore "pre-load" state, but more general so we can save * on suspend too. */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) - crtc->save(&crtc->base); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + nv_crtc->save(&nv_crtc->base); + } list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head) encoder->enc_save(&encoder->base.base); /* Enable flip completion events. */ nvif_notify_get(&disp->flip); + + if (!resume) + return 0; + + /* Re-pin FB/cursors. */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_framebuffer *nouveau_fb; + + nouveau_fb = nouveau_framebuffer(crtc->primary->fb); + if (!nouveau_fb || !nouveau_fb->nvbo) + continue; + + ret = nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM, true); + if (ret) + NV_ERROR(drm, "Could not pin framebuffer\n"); + } + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + if (!nv_crtc->cursor.nvbo) + continue; + + ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, true); + if (!ret && nv_crtc->cursor.set_offset) + ret = nouveau_bo_map(nv_crtc->cursor.nvbo); + if (ret) + NV_ERROR(drm, "Could not pin/map cursor.\n"); + } + + /* Force CLUT to get re-loaded during modeset. */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + + nv_crtc->lut.depth = 0; + } + + /* This should ensure we don't hit a locking problem when someone + * wakes us up via a connector. We should never go into suspend + * while the display is on anyways. + */ + if (runtime) + return 0; + + /* Restore mode. */ + drm_helper_resume_force_mode(dev); + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + + if (!nv_crtc->cursor.nvbo) + continue; + + if (nv_crtc->cursor.set_offset) + nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.offset); + nv_crtc->cursor.set_pos(nv_crtc, nv_crtc->cursor_saved_x, + nv_crtc->cursor_saved_y); + } + return 0; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index b8a04f178a67..4b1650f51955 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2244,7 +2244,7 @@ nv50_display_fini(struct drm_device *dev, bool suspend) } static int -nv50_display_init(struct drm_device *dev) +nv50_display_init(struct drm_device *dev, bool resume, bool runtime) { struct nv50_core *core = nv50_disp(dev)->core; struct drm_encoder *encoder; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 2e717224d1a3..55c0fa451163 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -407,14 +407,14 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val, #endif int -nouveau_display_init(struct drm_device *dev) +nouveau_display_init(struct drm_device *dev, bool resume, bool runtime) { struct nouveau_display *disp = nouveau_display(dev); struct drm_connector *connector; struct drm_connector_list_iter conn_iter; int ret; - ret = disp->init(dev); + ret = disp->init(dev, resume, runtime); if (ret) return ret; @@ -634,73 +634,16 @@ void nouveau_display_resume(struct drm_device *dev, bool runtime) { struct nouveau_display *disp = nouveau_display(dev); - struct nouveau_drm *drm = nouveau_drm(dev); - struct drm_crtc *crtc; - int ret; + + nouveau_display_init(dev, true, runtime); if (drm_drv_uses_atomic_modeset(dev)) { - nouveau_display_init(dev); if (disp->suspend) { drm_atomic_helper_resume(dev, disp->suspend); disp->suspend = NULL; } return; } - - /* re-pin fb/cursors */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct nouveau_framebuffer *nouveau_fb; - - nouveau_fb = nouveau_framebuffer(crtc->primary->fb); - if (!nouveau_fb || !nouveau_fb->nvbo) - continue; - - ret = nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM, true); - if (ret) - NV_ERROR(drm, "Could not pin framebuffer\n"); - } - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - if (!nv_crtc->cursor.nvbo) - continue; - - ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, true); - if (!ret && nv_crtc->cursor.set_offset) - ret = nouveau_bo_map(nv_crtc->cursor.nvbo); - if (ret) - NV_ERROR(drm, "Could not pin/map cursor.\n"); - } - - nouveau_display_init(dev); - - /* Force CLUT to get re-loaded during modeset */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - - nv_crtc->lut.depth = 0; - } - - /* This should ensure we don't hit a locking problem when someone - * wakes us up via a connector. We should never go into suspend - * while the display is on anyways. - */ - if (runtime) - return; - - drm_helper_resume_force_mode(dev); - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - - if (!nv_crtc->cursor.nvbo) - continue; - - if (nv_crtc->cursor.set_offset) - nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.offset); - nv_crtc->cursor.set_pos(nv_crtc, nv_crtc->cursor_saved_x, - nv_crtc->cursor_saved_y); - } } int diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 33ed788f5855..311e175f0513 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -28,7 +28,7 @@ int nouveau_framebuffer_new(struct drm_device *, struct nouveau_display { void *priv; void (*dtor)(struct drm_device *); - int (*init)(struct drm_device *); + int (*init)(struct drm_device *, bool resume, bool runtime); void (*fini)(struct drm_device *, bool suspend); struct nvif_disp disp; @@ -53,7 +53,7 @@ nouveau_display(struct drm_device *dev) int nouveau_display_create(struct drm_device *dev); void nouveau_display_destroy(struct drm_device *dev); -int nouveau_display_init(struct drm_device *dev); +int nouveau_display_init(struct drm_device *dev, bool resume, bool runtime); void nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime); int nouveau_display_suspend(struct drm_device *dev, bool runtime); void nouveau_display_resume(struct drm_device *dev, bool runtime); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 4ee530c941b1..50025e1ff857 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -498,7 +498,7 @@ nouveau_drm_device_init(struct drm_device *dev) goto fail_dispctor; if (dev->mode_config.num_crtc) { - ret = nouveau_display_init(dev); + ret = nouveau_display_init(dev, false, false); if (ret) goto fail_dispinit; } From f0eee9aec06acc33f2bb860af5b8dab417479d0a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 33/60] drm/nouveau: allow accelerated buffer moves even when gr isn't present There's no need to avoid using copy engines if gr init fails for some reason (usually missing FW, or incomplete bring-up). It's not terribly useful for an end-user, but it'll slightly speed up suspend/resume when saving fb contents, and allow for host/ce code to be validated. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 213 ++++++++++++++++---------- 1 file changed, 128 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 50025e1ff857..c1e9069bd2ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -282,18 +282,134 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, } static void -nouveau_accel_fini(struct nouveau_drm *drm) +nouveau_accel_ce_fini(struct nouveau_drm *drm) +{ + nouveau_channel_idle(drm->cechan); + nvif_object_fini(&drm->ttm.copy); + nouveau_channel_del(&drm->cechan); +} + +static void +nouveau_accel_ce_init(struct nouveau_drm *drm) +{ + struct nvif_device *device = &drm->client.device; + int ret = 0; + + /* Allocate channel that has access to a (preferably async) copy + * engine, to use for TTM buffer moves. + */ + if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { + ret = nouveau_channel_new(drm, device, + nvif_fifo_runlist_ce(device), 0, + true, &drm->cechan); + } else + if (device->info.chipset >= 0xa3 && + device->info.chipset != 0xaa && + device->info.chipset != 0xac) { + /* Prior to Kepler, there's only a single runlist, so all + * engines can be accessed from any channel. + * + * We still want to use a separate channel though. + */ + ret = nouveau_channel_new(drm, device, NvDmaFB, NvDmaTT, false, + &drm->cechan); + } + + if (ret) + NV_ERROR(drm, "failed to create ce channel, %d\n", ret); +} + +static void +nouveau_accel_gr_fini(struct nouveau_drm *drm) { nouveau_channel_idle(drm->channel); nvif_object_fini(&drm->ntfy); nvkm_gpuobj_del(&drm->notify); nvif_object_fini(&drm->nvsw); nouveau_channel_del(&drm->channel); +} - nouveau_channel_idle(drm->cechan); - nvif_object_fini(&drm->ttm.copy); - nouveau_channel_del(&drm->cechan); +static void +nouveau_accel_gr_init(struct nouveau_drm *drm) +{ + struct nvif_device *device = &drm->client.device; + u32 arg0, arg1; + int ret; + /* Allocate channel that has access to the graphics engine. */ + if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { + arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR); + arg1 = 1; + } else { + arg0 = NvDmaFB; + arg1 = NvDmaTT; + } + + ret = nouveau_channel_new(drm, device, arg0, arg1, false, + &drm->channel); + if (ret) { + NV_ERROR(drm, "failed to create kernel channel, %d\n", ret); + nouveau_accel_gr_fini(drm); + return; + } + + /* A SW class is used on pre-NV50 HW to assist with handling the + * synchronisation of page flips, as well as to implement fences + * on TNT/TNT2 HW that lacks any kind of support in host. + */ + if (device->info.family < NV_DEVICE_INFO_V0_TESLA) { + ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW, + nouveau_abi16_swclass(drm), NULL, 0, + &drm->nvsw); + if (ret == 0) { + ret = RING_SPACE(drm->channel, 2); + if (ret == 0) { + BEGIN_NV04(drm->channel, NvSubSw, 0, 1); + OUT_RING (drm->channel, drm->nvsw.handle); + } + } + + if (ret) { + NV_ERROR(drm, "failed to allocate sw class, %d\n", ret); + nouveau_accel_gr_fini(drm); + return; + } + } + + /* NvMemoryToMemoryFormat requires a notifier ctxdma for some reason, + * even if notification is never requested, so, allocate a ctxdma on + * any GPU where it's possible we'll end up using M2MF for BO moves. + */ + if (device->info.family < NV_DEVICE_INFO_V0_FERMI) { + ret = nvkm_gpuobj_new(nvxx_device(device), 32, 0, false, NULL, + &drm->notify); + if (ret) { + NV_ERROR(drm, "failed to allocate notifier, %d\n", ret); + nouveau_accel_gr_fini(drm); + return; + } + + ret = nvif_object_init(&drm->channel->user, NvNotify0, + NV_DMA_IN_MEMORY, + &(struct nv_dma_v0) { + .target = NV_DMA_V0_TARGET_VRAM, + .access = NV_DMA_V0_ACCESS_RDWR, + .start = drm->notify->addr, + .limit = drm->notify->addr + 31 + }, sizeof(struct nv_dma_v0), + &drm->ntfy); + if (ret) { + nouveau_accel_gr_fini(drm); + return; + } + } +} + +static void +nouveau_accel_fini(struct nouveau_drm *drm) +{ + nouveau_accel_ce_fini(drm); + nouveau_accel_gr_fini(drm); if (drm->fence) nouveau_fence(drm)->dtor(drm); } @@ -303,23 +419,16 @@ nouveau_accel_init(struct nouveau_drm *drm) { struct nvif_device *device = &drm->client.device; struct nvif_sclass *sclass; - u32 arg0, arg1; int ret, i, n; if (nouveau_noaccel) return; + /* Initialise global support for channels, and synchronisation. */ ret = nouveau_channels_init(drm); if (ret) return; - if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) { - ret = nvif_user_init(device); - if (ret) - return; - } - - /* initialise synchronisation routines */ /*XXX: this is crap, but the fence/channel stuff is a little * backwards in some places. this will be fixed. */ @@ -366,84 +475,18 @@ nouveau_accel_init(struct nouveau_drm *drm) return; } - if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { - ret = nouveau_channel_new(drm, &drm->client.device, - nvif_fifo_runlist_ce(device), 0, - true, &drm->cechan); + /* Volta requires access to a doorbell register for kickoff. */ + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) { + ret = nvif_user_init(device); if (ret) - NV_ERROR(drm, "failed to create ce channel, %d\n", ret); - - arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR); - arg1 = 1; - } else - if (device->info.chipset >= 0xa3 && - device->info.chipset != 0xaa && - device->info.chipset != 0xac) { - ret = nouveau_channel_new(drm, &drm->client.device, - NvDmaFB, NvDmaTT, false, - &drm->cechan); - if (ret) - NV_ERROR(drm, "failed to create ce channel, %d\n", ret); - - arg0 = NvDmaFB; - arg1 = NvDmaTT; - } else { - arg0 = NvDmaFB; - arg1 = NvDmaTT; - } - - ret = nouveau_channel_new(drm, &drm->client.device, - arg0, arg1, false, &drm->channel); - if (ret) { - NV_ERROR(drm, "failed to create kernel channel, %d\n", ret); - nouveau_accel_fini(drm); - return; - } - - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) { - ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW, - nouveau_abi16_swclass(drm), NULL, 0, - &drm->nvsw); - if (ret == 0) { - ret = RING_SPACE(drm->channel, 2); - if (ret == 0) { - BEGIN_NV04(drm->channel, NvSubSw, 0, 1); - OUT_RING (drm->channel, drm->nvsw.handle); - } - } - - if (ret) { - NV_ERROR(drm, "failed to allocate sw class, %d\n", ret); - nouveau_accel_fini(drm); return; - } - } - - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) { - ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0, - false, NULL, &drm->notify); - if (ret) { - NV_ERROR(drm, "failed to allocate notifier, %d\n", ret); - nouveau_accel_fini(drm); - return; - } - - ret = nvif_object_init(&drm->channel->user, NvNotify0, - NV_DMA_IN_MEMORY, - &(struct nv_dma_v0) { - .target = NV_DMA_V0_TARGET_VRAM, - .access = NV_DMA_V0_ACCESS_RDWR, - .start = drm->notify->addr, - .limit = drm->notify->addr + 31 - }, sizeof(struct nv_dma_v0), - &drm->ntfy); - if (ret) { - nouveau_accel_fini(drm); - return; - } } + /* Allocate channels we need to support various functions. */ + nouveau_accel_gr_init(drm); + nouveau_accel_ce_init(drm); + /* Initialise accelerated TTM buffer moves. */ nouveau_bo_move_init(drm); } From eb383e629c6f4e771e0d8869348f68de02c4a1a2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 34/60] drm/nouveau/gr/gf100-: move fecs set_watchdog_timeout method into a function Makes the code somewhat less magic. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 70d3d41e616c..05cdfb7d1f85 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,16 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static void +gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409500, timeout); + nvkm_wr32(device, 0x409504, 0x00000021); +} + static bool gf100_gr_chsw_load(struct nvkm_gr *base) { @@ -1524,9 +1534,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) ) < 0) return -EBUSY; - nvkm_wr32(device, 0x409840, 0xffffffff); - nvkm_wr32(device, 0x409500, 0x7fffffff); - nvkm_wr32(device, 0x409504, 0x00000021); + gf100_gr_fecs_set_watchdog_timeout(gr, 0x7fffffff); nvkm_wr32(device, 0x409840, 0xffffffff); nvkm_wr32(device, 0x409500, 0x00000000); From 0b89ca0dc34446b473000f4bc2906a58876d78b0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 35/60] drm/nouveau/gr/gf100-: move fecs discover_image_size into a function Makes the code somewhat less magic. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 05cdfb7d1f85..6c045f26260d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,22 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static int +gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409500, 0x00000000); + nvkm_wr32(device, 0x409504, 0x00000010); + nvkm_msec(device, 2000, + if ((*psize = nvkm_rd32(device, 0x409800))) + return 0; + ); + + return -ETIMEDOUT; +} + static void gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout) { @@ -1497,6 +1513,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) struct nvkm_device *device = subdev->device; struct nvkm_secboot *sb = device->secboot; u32 secboot_mask = 0; + int ret; /* load fuc microcode */ nvkm_mc_unk260(device, 0); @@ -1536,14 +1553,10 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) gf100_gr_fecs_set_watchdog_timeout(gr, 0x7fffffff); - nvkm_wr32(device, 0x409840, 0xffffffff); - nvkm_wr32(device, 0x409500, 0x00000000); - nvkm_wr32(device, 0x409504, 0x00000010); - if (nvkm_msec(device, 2000, - if ((gr->size = nvkm_rd32(device, 0x409800))) - break; - ) < 0) - return -EBUSY; + /* Determine how much memory is required to store main context image. */ + ret = gf100_gr_fecs_discover_image_size(gr, &gr->size); + if (ret) + return ret; nvkm_wr32(device, 0x409840, 0xffffffff); nvkm_wr32(device, 0x409500, 0x00000000); From 7d3f06881dd07f9358b628d328520eb17cf91eb7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 36/60] drm/nouveau/gr/gf100-: move fecs discover_zcull_image_size into a function Makes the code somewhat less magic. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 28 +++++++++++++------ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 1 + 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 6c045f26260d..b5eecc6707b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,22 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static int +gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409500, 0x00000000); + nvkm_wr32(device, 0x409504, 0x00000016); + nvkm_msec(device, 2000, + if ((*psize = nvkm_rd32(device, 0x409800))) + return 0; + ); + + return -ETIMEDOUT; +} + static int gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize) { @@ -1558,14 +1574,10 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) if (ret) return ret; - nvkm_wr32(device, 0x409840, 0xffffffff); - nvkm_wr32(device, 0x409500, 0x00000000); - nvkm_wr32(device, 0x409504, 0x00000016); - if (nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800)) - break; - ) < 0) - return -EBUSY; + /* Determine how much memory is required to store ZCULL image. */ + ret = gf100_gr_fecs_discover_zcull_image_size(gr, &gr->size_zcull); + if (ret) + return ret; nvkm_wr32(device, 0x409840, 0xffffffff); nvkm_wr32(device, 0x409500, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index dc46cf0131db..81bdf658b5cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -128,6 +128,7 @@ struct gf100_gr { struct gf100_gr_mmio mmio_list[4096/8]; u32 size; u32 *data; + u32 size_zcull; }; int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *, From 8bf2d348bd549b7ebbcd894aa8afa0a407beb081 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 37/60] drm/nouveau/gr/gf100-: move fecs discover_pm_image_size into a function Makes the code somewhat less magic. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 28 +++++++++++++------ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 1 + 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index b5eecc6707b4..28dae3bf5f0f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,22 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static int +gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409500, 0x00000000); + nvkm_wr32(device, 0x409504, 0x00000025); + nvkm_msec(device, 2000, + if ((*psize = nvkm_rd32(device, 0x409800))) + return 0; + ); + + return -ETIMEDOUT; +} + static int gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize) { @@ -1579,14 +1595,10 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) if (ret) return ret; - nvkm_wr32(device, 0x409840, 0xffffffff); - nvkm_wr32(device, 0x409500, 0x00000000); - nvkm_wr32(device, 0x409504, 0x00000025); - if (nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800)) - break; - ) < 0) - return -EBUSY; + /* Determine how much memory is required to store PerfMon image. */ + ret = gf100_gr_fecs_discover_pm_image_size(gr, &gr->size_pm); + if (ret) + return ret; if (device->chipset >= 0xe0) { nvkm_wr32(device, 0x409800, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index 81bdf658b5cd..8fda19073614 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -129,6 +129,7 @@ struct gf100_gr { u32 size; u32 *data; u32 size_zcull; + u32 size_pm; }; int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *, From 7d51bc85d774c8b8339cf19893661bb2d1a7e78a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 38/60] drm/nouveau/gr/gf100-: move fecs elpg setup into functions Makes the code somewhat less magic. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 113 +++++++++++++----- 1 file changed, 84 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 28dae3bf5f0f..47469f4591d5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,78 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static int +gf100_gr_fecs_set_reglist_virtual_address(struct gf100_gr *gr, u64 addr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409810, addr >> 8); + nvkm_wr32(device, 0x409800, 0x00000000); + nvkm_wr32(device, 0x409500, 0x00000001); + nvkm_wr32(device, 0x409504, 0x00000032); + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x409800) == 0x00000001) + return 0; + ); + + return -ETIMEDOUT; +} + +static int +gf100_gr_fecs_set_reglist_bind_instance(struct gf100_gr *gr, u32 inst) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409810, inst); + nvkm_wr32(device, 0x409800, 0x00000000); + nvkm_wr32(device, 0x409500, 0x00000001); + nvkm_wr32(device, 0x409504, 0x00000031); + nvkm_msec(device, 2000, + if (nvkm_rd32(device, 0x409800) == 0x00000001) + return 0; + ); + + return -ETIMEDOUT; +} + +static int +gf100_gr_fecs_discover_reglist_image_size(struct gf100_gr *gr, u32 *psize) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409800, 0x00000000); + nvkm_wr32(device, 0x409500, 0x00000001); + nvkm_wr32(device, 0x409504, 0x00000030); + nvkm_msec(device, 2000, + if ((*psize = nvkm_rd32(device, 0x409800))) + return 0; + ); + + return -ETIMEDOUT; +} + +static int +gf100_gr_fecs_elpg_bind(struct gf100_gr *gr) +{ + u32 size; + int ret; + + ret = gf100_gr_fecs_discover_reglist_image_size(gr, &size); + if (ret) + return ret; + + /*XXX: We need to allocate + map the above into PMU's inst block, + * which which means we probably need a proper PMU before we + * even bother. + */ + + ret = gf100_gr_fecs_set_reglist_bind_instance(gr, 0); + if (ret) + return ret; + + return gf100_gr_fecs_set_reglist_virtual_address(gr, 0); +} + static int gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize) { @@ -1600,36 +1672,19 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) if (ret) return ret; + /*XXX: We (likely) require PMU support to even bother with this. + * + * Also, it seems like not all GPUs support ELPG. Traces I + * have here show RM enabling it on Kepler/Turing, but none + * of the GPUs between those. NVGPU decides this by PCIID. + */ + if (0) { + ret = gf100_gr_fecs_elpg_bind(gr); + if (ret) + return ret; + } + if (device->chipset >= 0xe0) { - nvkm_wr32(device, 0x409800, 0x00000000); - nvkm_wr32(device, 0x409500, 0x00000001); - nvkm_wr32(device, 0x409504, 0x00000030); - if (nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800)) - break; - ) < 0) - return -EBUSY; - - nvkm_wr32(device, 0x409810, 0xb00095c8); - nvkm_wr32(device, 0x409800, 0x00000000); - nvkm_wr32(device, 0x409500, 0x00000001); - nvkm_wr32(device, 0x409504, 0x00000031); - if (nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800)) - break; - ) < 0) - return -EBUSY; - - nvkm_wr32(device, 0x409810, 0x00080420); - nvkm_wr32(device, 0x409800, 0x00000000); - nvkm_wr32(device, 0x409500, 0x00000001); - nvkm_wr32(device, 0x409504, 0x00000032); - if (nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800)) - break; - ) < 0) - return -EBUSY; - nvkm_wr32(device, 0x409614, 0x00000070); nvkm_wr32(device, 0x409614, 0x00000770); nvkm_wr32(device, 0x40802c, 0x00000001); From 8c7db7684448c51a406d07461aa74d436fba5b3e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 39/60] drm/nouveau/gr/gf100-: remove some unnecessary reg writes This is already done during golden context creation. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 47469f4591d5..5ab6468b25f4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1684,12 +1684,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) return ret; } - if (device->chipset >= 0xe0) { - nvkm_wr32(device, 0x409614, 0x00000070); - nvkm_wr32(device, 0x409614, 0x00000770); - nvkm_wr32(device, 0x40802c, 0x00000001); - } - + /* Generate golden context image. */ if (gr->data == NULL) { int ret = gf100_grctx_generate(gr); if (ret) { From b7f713b8d313d302588906078b92d919b44d5b75 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 40/60] drm/nouveau/gr/gf100-: move fecs bind_pointer into a function Makes the code somewhat less magic. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 10 +++------- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 19 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 2 ++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index e813a3f8ea93..85f2d1e950e8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1523,13 +1523,9 @@ gf100_grctx_generate(struct gf100_gr *gr) /* Make channel current. */ addr = nvkm_memory_addr(inst) >> 12; if (gr->firmware) { - nvkm_wr32(device, 0x409840, 0x00000030); - nvkm_wr32(device, 0x409500, 0x80000000 | addr); - nvkm_wr32(device, 0x409504, 0x00000003); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800) & 0x00000010) - break; - ); + ret = gf100_gr_fecs_bind_pointer(gr, 0x80000000 | addr); + if (ret) + goto done; nvkm_kmap(data); nvkm_wo32(data, 0x1c, 1); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 5ab6468b25f4..0a38007ad17e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,25 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +int +gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409840, 0x00000030); + nvkm_wr32(device, 0x409500, inst); + nvkm_wr32(device, 0x409504, 0x00000003); + nvkm_msec(device, 2000, + u32 stat = nvkm_rd32(device, 0x409800); + if (stat & 0x00000020) + return -EIO; + if (stat & 0x00000010) + return 0; + ); + + return -ETIMEDOUT; +} + static int gf100_gr_fecs_set_reglist_virtual_address(struct gf100_gr *gr, u64 addr) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index 8fda19073614..abed68276eff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -138,6 +138,8 @@ int gf100_gr_new_(const struct gf100_gr_func *, struct nvkm_device *, int, struct nvkm_gr **); void *gf100_gr_dtor(struct nvkm_gr *); +int gf100_gr_fecs_bind_pointer(struct gf100_gr *, u32 inst); + struct gf100_gr_func_zbc { void (*clear_color)(struct gf100_gr *, int zbc); void (*clear_depth)(struct gf100_gr *, int zbc); From 8e44b987e8f1566bd6670df29f8482748fe8949e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 41/60] drm/nouveau/gr/gf100-: store fecs/gpccs falcon pointers in substructures Future changes will want to add some additional things here, keep them grouped together. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 36 ++++++++++--------- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 10 ++++-- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 0a38007ad17e..bfe740033cf4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1645,12 +1645,12 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS)) secboot_mask |= BIT(NVKM_SECBOOT_FALCON_FECS); else - gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d); + gf100_gr_init_fw(gr->fecs.falcon, &gr->fuc409c, &gr->fuc409d); if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS)) secboot_mask |= BIT(NVKM_SECBOOT_FALCON_GPCCS); else - gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad); + gf100_gr_init_fw(gr->gpccs.falcon, &gr->fuc41ac, &gr->fuc41ad); if (secboot_mask != 0) { int ret = nvkm_secboot_reset(sb, secboot_mask); @@ -1665,8 +1665,8 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) nvkm_wr32(device, 0x41a10c, 0x00000000); nvkm_wr32(device, 0x40910c, 0x00000000); - nvkm_falcon_start(gr->gpccs); - nvkm_falcon_start(gr->fecs); + nvkm_falcon_start(gr->gpccs.falcon); + nvkm_falcon_start(gr->fecs.falcon); if (nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x409800) & 0x00000001) @@ -1728,15 +1728,19 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr) /* load HUB microcode */ nvkm_mc_unk260(device, 0); - nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0, + nvkm_falcon_load_dmem(gr->fecs.falcon, + gr->func->fecs.ucode->data.data, 0x0, gr->func->fecs.ucode->data.size, 0); - nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0, + nvkm_falcon_load_imem(gr->fecs.falcon, + gr->func->fecs.ucode->code.data, 0x0, gr->func->fecs.ucode->code.size, 0, 0, false); /* load GPC microcode */ - nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0, + nvkm_falcon_load_dmem(gr->gpccs.falcon, + gr->func->gpccs.ucode->data.data, 0x0, gr->func->gpccs.ucode->data.size, 0); - nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0, + nvkm_falcon_load_imem(gr->gpccs.falcon, + gr->func->gpccs.ucode->code.data, 0x0, gr->func->gpccs.ucode->code.size, 0, 0, false); nvkm_mc_unk260(device, 1); @@ -1883,11 +1887,11 @@ gf100_gr_oneinit(struct nvkm_gr *base) int i, j; int ret; - ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs); + ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs.falcon); if (ret) return ret; - ret = nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs); + ret = nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs.falcon); if (ret) return ret; @@ -1930,11 +1934,11 @@ gf100_gr_init_(struct nvkm_gr *base) nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false); - ret = nvkm_falcon_get(gr->fecs, subdev); + ret = nvkm_falcon_get(gr->fecs.falcon, subdev); if (ret) return ret; - ret = nvkm_falcon_get(gr->gpccs, subdev); + ret = nvkm_falcon_get(gr->gpccs.falcon, subdev); if (ret) return ret; @@ -1946,8 +1950,8 @@ gf100_gr_fini_(struct nvkm_gr *base, bool suspend) { struct gf100_gr *gr = gf100_gr(base); struct nvkm_subdev *subdev = &gr->base.engine.subdev; - nvkm_falcon_put(gr->gpccs, subdev); - nvkm_falcon_put(gr->fecs, subdev); + nvkm_falcon_put(gr->gpccs.falcon, subdev); + nvkm_falcon_put(gr->fecs.falcon, subdev); return 0; } @@ -1973,8 +1977,8 @@ gf100_gr_dtor(struct nvkm_gr *base) gr->func->dtor(gr); kfree(gr->data); - nvkm_falcon_del(&gr->gpccs); - nvkm_falcon_del(&gr->fecs); + nvkm_falcon_del(&gr->gpccs.falcon); + nvkm_falcon_del(&gr->fecs.falcon); gf100_gr_dtor_fw(&gr->fuc409c); gf100_gr_dtor_fw(&gr->fuc409d); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index abed68276eff..2c031af6881a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -82,8 +82,14 @@ struct gf100_gr { const struct gf100_gr_func *func; struct nvkm_gr base; - struct nvkm_falcon *fecs; - struct nvkm_falcon *gpccs; + struct { + struct nvkm_falcon *falcon; + } fecs; + + struct { + struct nvkm_falcon *falcon; + } gpccs; + struct gf100_gr_fuc fuc409c; struct gf100_gr_fuc fuc409d; struct gf100_gr_fuc fuc41ac; From 874c1b56f363472187bdf681d42627d805924833 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 42/60] drm/nouveau/mmu/gf100-: make mmu invalidate function more general Will want to reuse this for fault replay/cancellation swmthds. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c | 12 +++++------- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 7 ++++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index e6a02b568c1b..734e58c1b860 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -186,7 +186,7 @@ int gf100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); void gf100_vmm_part(struct nvkm_vmm *, struct nvkm_memory *); int gf100_vmm_aper(enum nvkm_memory_target); int gf100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); -void gf100_vmm_flush_(struct nvkm_vmm *, int); +void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type); void gf100_vmm_flush(struct nvkm_vmm *, int); int gk20a_vmm_aper(enum nvkm_memory_target); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c index faf5a7e9265e..fc8ef801d0fe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c @@ -178,15 +178,10 @@ gf100_vmm_desc_16_16[] = { }; void -gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth) +gf100_vmm_invalidate(struct nvkm_vmm *vmm, u32 type) { struct nvkm_subdev *subdev = &vmm->mmu->subdev; struct nvkm_device *device = subdev->device; - u32 type = depth << 24; - - type = 0x00000001; /* PAGE_ALL */ - if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ mutex_lock(&subdev->mutex); /* Looks like maybe a "free flush slots" counter, the @@ -211,7 +206,10 @@ gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth) void gf100_vmm_flush(struct nvkm_vmm *vmm, int depth) { - gf100_vmm_flush_(vmm, 0); + u32 type = 0x00000001; /* PAGE_ALL */ + if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) + type |= 0x00000004; /* HUB_ONLY */ + gf100_vmm_invalidate(vmm, type); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index 059fafe0e771..d44653acca8e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -309,7 +309,12 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, void gp100_vmm_flush(struct nvkm_vmm *vmm, int depth) { - gf100_vmm_flush_(vmm, 5 /* CACHE_LEVEL_UP_TO_PDE3 */ - depth); + u32 type = (5 /* CACHE_LEVEL_UP_TO_PDE3 */ - depth) << 24; + type = 0; /*XXX: need to confirm stuff works with depth enabled... */ + if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) + type |= 0x00000004; /* HUB_ONLY */ + type |= 0x00000001; /* PAGE_ALL */ + gf100_vmm_invalidate(vmm, type); } int From d389fd4fa937f46402fa56f784ac8111000cd12e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 12 Feb 2019 22:28:13 +1000 Subject: [PATCH 43/60] drm/nouveau/mmu/gf100-: virtualise setting pdb base address for invalidation It appears that Pascal and newer need something different. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 6 ++++- .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c | 26 ++++++++++++++++++- .../drm/nouveau/nvkm/subdev/mmu/vmmgk104.c | 2 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c | 2 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgm200.c | 2 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c | 2 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 9 +++++++ .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 1 + .../drm/nouveau/nvkm/subdev/mmu/vmmgv100.c | 1 + 9 files changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 734e58c1b860..68474f1f09c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -141,6 +141,8 @@ struct nvkm_vmm_func { struct nvkm_vmm_map *); void (*flush)(struct nvkm_vmm *, int depth); + void (*invalidate_pdb)(struct nvkm_vmm *, u64 addr); + u64 page_block; const struct nvkm_vmm_page page[]; }; @@ -186,8 +188,9 @@ int gf100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); void gf100_vmm_part(struct nvkm_vmm *, struct nvkm_memory *); int gf100_vmm_aper(enum nvkm_memory_target); int gf100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); -void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type); void gf100_vmm_flush(struct nvkm_vmm *, int); +void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type); +void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); int gk20a_vmm_aper(enum nvkm_memory_target); @@ -200,6 +203,7 @@ int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); void gp100_vmm_flush(struct nvkm_vmm *, int); +void gp100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); int gv100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c index fc8ef801d0fe..b414413b07d4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c @@ -177,11 +177,20 @@ gf100_vmm_desc_16_16[] = { {} }; +void +gf100_vmm_invalidate_pdb(struct nvkm_vmm *vmm, u64 addr) +{ + struct nvkm_device *device = vmm->mmu->subdev.device; + nvkm_wr32(device, 0x100cb8, addr); +} + void gf100_vmm_invalidate(struct nvkm_vmm *vmm, u32 type) { struct nvkm_subdev *subdev = &vmm->mmu->subdev; struct nvkm_device *device = subdev->device; + struct nvkm_mmu_pt *pd = vmm->pd->pt[0]; + u64 addr = 0; mutex_lock(&subdev->mutex); /* Looks like maybe a "free flush slots" counter, the @@ -192,7 +201,20 @@ gf100_vmm_invalidate(struct nvkm_vmm *vmm, u32 type) break; ); - nvkm_wr32(device, 0x100cb8, vmm->pd->pt[0]->addr >> 8); + if (!(type & 0x00000002) /* ALL_PDB. */) { + switch (nvkm_memory_target(pd->memory)) { + case NVKM_MEM_TARGET_VRAM: addr |= 0x00000000; break; + case NVKM_MEM_TARGET_HOST: addr |= 0x00000002; break; + case NVKM_MEM_TARGET_NCOH: addr |= 0x00000003; break; + default: + WARN_ON(1); + break; + } + addr |= (vmm->pd->pt[0]->addr >> 12) << 4; + + vmm->func->invalidate_pdb(vmm, addr); + } + nvkm_wr32(device, 0x100cbc, 0x80000000 | type); /* Wait for flush to be queued? */ @@ -352,6 +374,7 @@ gf100_vmm_17 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 17, &gf100_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC }, { 12, &gf100_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx }, @@ -366,6 +389,7 @@ gf100_vmm_16 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 16, &gf100_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC }, { 12, &gf100_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c index 0ebb7bccfcd2..2679068e57ac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c @@ -71,6 +71,7 @@ gk104_vmm_17 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC }, { 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx }, @@ -85,6 +86,7 @@ gk104_vmm_16 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC }, { 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c index 8086994a0446..5c6f645080ae 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c @@ -40,6 +40,7 @@ gk20a_vmm_17 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xxHC }, { 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xxHx }, @@ -54,6 +55,7 @@ gk20a_vmm_16 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xxHC }, { 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xxHx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c index a1676a4644fe..dd7f60753f6d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c @@ -113,6 +113,7 @@ gm200_vmm_17 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx }, { 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SVxC }, @@ -128,6 +129,7 @@ gm200_vmm_16 = { .aper = gf100_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx }, { 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SVxC }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c index 64d4b6cff8dd..57d763c9629e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c @@ -28,6 +28,7 @@ gm20b_vmm_17 = { .aper = gk20a_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx }, { 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SxHC }, @@ -43,6 +44,7 @@ gm20b_vmm_16 = { .aper = gk20a_vmm_aper, .valid = gf100_vmm_valid, .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, .page = { { 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx }, { 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SxHC }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index d44653acca8e..f9c95c43c7a3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -306,6 +306,14 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return 0; } +void +gp100_vmm_invalidate_pdb(struct nvkm_vmm *vmm, u64 addr) +{ + struct nvkm_device *device = vmm->mmu->subdev.device; + nvkm_wr32(device, 0x100cb8, lower_32_bits(addr)); + nvkm_wr32(device, 0x100cec, upper_32_bits(addr)); +} + void gp100_vmm_flush(struct nvkm_vmm *vmm, int depth) { @@ -331,6 +339,7 @@ gp100_vmm = { .aper = gf100_vmm_aper, .valid = gp100_vmm_valid, .flush = gp100_vmm_flush, + .invalidate_pdb = gp100_vmm_invalidate_pdb, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index 3dcc6bddb32f..13d0d48bab55 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -28,6 +28,7 @@ gp10b_vmm = { .aper = gk20a_vmm_aper, .valid = gp100_vmm_valid, .flush = gp100_vmm_flush, + .invalidate_pdb = gp100_vmm_invalidate_pdb, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c index 2fa40c16e6d2..8574bc980bbb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c @@ -66,6 +66,7 @@ gv100_vmm = { .aper = gf100_vmm_aper, .valid = gp100_vmm_valid, .flush = gp100_vmm_flush, + .invalidate_pdb = gp100_vmm_invalidate_pdb, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, From 8e083686ec90b62ed606542aa707ff917fac3259 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 12 Feb 2019 13:51:18 +0000 Subject: [PATCH 44/60] drm/nouveau/falcon: fix a few indentation issues There are a few statements that are indented incorrectly. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c index 771e16a16267..a8bee1e046aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c @@ -269,7 +269,7 @@ cmd_write(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_hdr *cmd, commit = false; } - cmd_queue_close(priv, queue, commit); + cmd_queue_close(priv, queue, commit); return ret; } @@ -347,7 +347,7 @@ nvkm_msgqueue_post(struct nvkm_msgqueue *priv, enum msgqueue_msg_priority prio, ret = cmd_write(priv, cmd, queue); if (ret) { seq->state = SEQ_STATE_PENDING; - msgqueue_seq_release(priv, seq); + msgqueue_seq_release(priv, seq); } return ret; @@ -373,7 +373,7 @@ msgqueue_msg_handle(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_hdr *hdr) if (seq->completion) complete(seq->completion); - msgqueue_seq_release(priv, seq); + msgqueue_seq_release(priv, seq); return 0; } From 169f30b35d66c8d041173ba69e03090fe4caa228 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 1 Feb 2019 13:52:50 +1000 Subject: [PATCH 45/60] drm/nouveau/gr/gf100-: expose fecs methods for pausing ctxsw MMU will need access to these. v2. Apply fix from Rhys Kidd to send correct FECS method for STOP_CTXSW. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/engine/gr.h | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c | 18 +++++++ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 54 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h | 4 ++ 5 files changed, 80 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h index ba1518ff8b66..87cc7370a216 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h @@ -10,6 +10,8 @@ struct nvkm_gr { u64 nvkm_gr_units(struct nvkm_gr *); int nvkm_gr_tlb_flush(struct nvkm_gr *); +int nvkm_gr_ctxsw_pause(struct nvkm_device *); +int nvkm_gr_ctxsw_resume(struct nvkm_device *); int nv04_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int nv10_gr_new(struct nvkm_device *, int, struct nvkm_gr **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c index cd8cf6f7024c..53f62827260b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c @@ -25,6 +25,24 @@ #include +int +nvkm_gr_ctxsw_resume(struct nvkm_device *device) +{ + struct nvkm_gr *gr = device->gr; + if (gr && gr->func->ctxsw.resume) + return gr->func->ctxsw.resume(gr); + return 0; +} + +int +nvkm_gr_ctxsw_pause(struct nvkm_device *device) +{ + struct nvkm_gr *gr = device->gr; + if (gr && gr->func->ctxsw.pause) + return gr->func->ctxsw.pause(gr); + return 0; +} + static bool nvkm_gr_chsw_load(struct nvkm_engine *engine) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index bfe740033cf4..63802edf89d4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,56 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static int +gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409804, 0xffffffff); + nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409500, 0xffffffff); + nvkm_wr32(device, 0x409504, mthd); + nvkm_msec(device, 2000, + u32 stat = nvkm_rd32(device, 0x409804); + if (stat == 0x00000002) + return -EIO; + if (stat == 0x00000001) + return 0; + ); + + return -ETIMEDOUT; +} + +int +gf100_gr_fecs_start_ctxsw(struct nvkm_gr *base) +{ + struct gf100_gr *gr = gf100_gr(base); + int ret = 0; + + mutex_lock(&gr->fecs.mutex); + if (!--gr->fecs.disable) { + if (WARN_ON(ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x39))) + gr->fecs.disable++; + } + mutex_unlock(&gr->fecs.mutex); + return ret; +} + +int +gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base) +{ + struct gf100_gr *gr = gf100_gr(base); + int ret = 0; + + mutex_lock(&gr->fecs.mutex); + if (!gr->fecs.disable++) { + if (WARN_ON(ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x38))) + gr->fecs.disable--; + } + mutex_unlock(&gr->fecs.mutex); + return ret; +} + int gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst) { @@ -1891,6 +1941,8 @@ gf100_gr_oneinit(struct nvkm_gr *base) if (ret) return ret; + mutex_init(&gr->fecs.mutex); + ret = nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs.falcon); if (ret) return ret; @@ -2004,6 +2056,8 @@ gf100_gr_ = { .chan_new = gf100_gr_chan_new, .object_get = gf100_gr_object_get, .chsw_load = gf100_gr_chsw_load, + .ctxsw.pause = gf100_gr_fecs_stop_ctxsw, + .ctxsw.resume = gf100_gr_fecs_start_ctxsw, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index 2c031af6881a..fafdd0bbea9b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -84,6 +84,8 @@ struct gf100_gr { struct { struct nvkm_falcon *falcon; + struct mutex mutex; + u32 disable; } fecs; struct { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h index 66359c23cbce..b8023a6b77a1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h @@ -27,6 +27,10 @@ struct nvkm_gr_func { */ u64 (*units)(struct nvkm_gr *); bool (*chsw_load)(struct nvkm_gr *); + struct { + int (*pause)(struct nvkm_gr *); + int (*resume)(struct nvkm_gr *); + } ctxsw; struct nvkm_sclass sclass[]; }; From ae5ea7f6a8117c8615de4203a105ab3de7766def Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 5 Feb 2019 14:54:53 +1000 Subject: [PATCH 46/60] drm/nouveau/gr/gf100-: expose method to determine current context MMU will need access to this info. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c | 9 +++++++++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h | 1 + 4 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h index 87cc7370a216..1e924c7f7ba7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h @@ -12,6 +12,7 @@ u64 nvkm_gr_units(struct nvkm_gr *); int nvkm_gr_tlb_flush(struct nvkm_gr *); int nvkm_gr_ctxsw_pause(struct nvkm_device *); int nvkm_gr_ctxsw_resume(struct nvkm_device *); +u32 nvkm_gr_ctxsw_inst(struct nvkm_device *); int nv04_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int nv10_gr_new(struct nvkm_device *, int, struct nvkm_gr **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c index 53f62827260b..d41fb94524e9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c @@ -25,6 +25,15 @@ #include +u32 +nvkm_gr_ctxsw_inst(struct nvkm_device *device) +{ + struct nvkm_gr *gr = device->gr; + if (gr && gr->func->ctxsw.inst) + return gr->func->ctxsw.inst(gr); + return 0; +} + int nvkm_gr_ctxsw_resume(struct nvkm_device *device) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 63802edf89d4..81a13cf9a292 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -715,6 +715,12 @@ gf100_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static u32 +gf100_gr_ctxsw_inst(struct nvkm_gr *gr) +{ + return nvkm_rd32(gr->engine.subdev.device, 0x409b00); +} + static int gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd) { @@ -2058,6 +2064,7 @@ gf100_gr_ = { .chsw_load = gf100_gr_chsw_load, .ctxsw.pause = gf100_gr_fecs_stop_ctxsw, .ctxsw.resume = gf100_gr_fecs_start_ctxsw, + .ctxsw.inst = gf100_gr_ctxsw_inst, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h index b8023a6b77a1..d4d5601c51e7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h @@ -30,6 +30,7 @@ struct nvkm_gr_func { struct { int (*pause)(struct nvkm_gr *); int (*resume)(struct nvkm_gr *); + u32 (*inst)(struct nvkm_gr *); } ctxsw; struct nvkm_sclass sclass[]; }; From 2606f291621eb319726243e0f3893644114277f8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jun 2018 16:25:53 +1000 Subject: [PATCH 47/60] drm/nouveau/mmu: support initialisation of client-managed address-spaces NVKM is currently responsible for managing the allocation of a client's GPU address-space, but there's various use-cases (ie. HMM address-space mirroring) where giving a client more direct control is desirable. This commit allows for a VMM to be created where the area allocated for NVKM is limited to a client-specified window, the remainder of address- space is controlled directly by the client. Leaving a window is necessary to support various internal requirements, but also to support existing allocation interfaces as not all of the HW is capable of working with a HMM allocation. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/if000c.h | 3 +- drivers/gpu/drm/nouveau/include/nvif/vmm.h | 4 +- drivers/gpu/drm/nouveau/nouveau_vmm.c | 2 +- drivers/gpu/drm/nouveau/nvif/vmm.c | 5 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/priv.h | 2 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c | 4 +- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 83 +++++++++++++++---- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 49 +++++------ .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c | 18 ++-- .../drm/nouveau/nvkm/subdev/mmu/vmmgk104.c | 8 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c | 8 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgm200.c | 20 ++--- .../drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c | 12 +-- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 8 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 8 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgv100.c | 8 +- .../drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c | 8 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c | 15 ++-- .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c | 6 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c | 6 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c | 6 +- .../drm/nouveau/nvkm/subdev/mmu/vmmtu102.c | 4 +- 22 files changed, 172 insertions(+), 115 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h index 2928ecd989ad..f8e29cfee7f8 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -3,7 +3,8 @@ struct nvif_vmm_v0 { __u8 version; __u8 page_nr; - __u8 pad02[6]; + __u8 managed; + __u8 pad03[5]; __u64 addr; __u64 size; __u8 data[]; diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h index c5db8a2e82df..79bf85d2f43a 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h +++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h @@ -30,8 +30,8 @@ struct nvif_vmm { int page_nr; }; -int nvif_vmm_init(struct nvif_mmu *, s32 oclass, u64 addr, u64 size, - void *argv, u32 argc, struct nvif_vmm *); +int nvif_vmm_init(struct nvif_mmu *, s32 oclass, bool managed, u64 addr, + u64 size, void *argv, u32 argc, struct nvif_vmm *); void nvif_vmm_fini(struct nvif_vmm *); int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse, u8 page, u8 align, u64 size, struct nvif_vma *); diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 2032c3e4f6e5..724d02d7c049 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -126,7 +126,7 @@ nouveau_vmm_fini(struct nouveau_vmm *vmm) int nouveau_vmm_init(struct nouveau_cli *cli, s32 oclass, struct nouveau_vmm *vmm) { - int ret = nvif_vmm_init(&cli->mmu, oclass, PAGE_SIZE, 0, NULL, 0, + int ret = nvif_vmm_init(&cli->mmu, oclass, false, PAGE_SIZE, 0, NULL, 0, &vmm->vmm); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c index 6b9c5776547f..11487c00b909 100644 --- a/drivers/gpu/drm/nouveau/nvif/vmm.c +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -112,8 +112,8 @@ nvif_vmm_fini(struct nvif_vmm *vmm) } int -nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, u64 addr, u64 size, - void *argv, u32 argc, struct nvif_vmm *vmm) +nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, bool managed, u64 addr, + u64 size, void *argv, u32 argc, struct nvif_vmm *vmm) { struct nvif_vmm_v0 *args; u32 argn = sizeof(*args) + argc; @@ -125,6 +125,7 @@ nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, u64 addr, u64 size, if (!(args = kmalloc(argn, GFP_KERNEL))) return -ENOMEM; args->version = 0; + args->managed = managed; args->addr = addr; args->size = size; memcpy(args->data, argv, argc); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h index 948a48c21be4..2ad1102a4e31 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h @@ -28,7 +28,7 @@ struct nvkm_mmu_func { struct { struct nvkm_sclass user; - int (*ctor)(struct nvkm_mmu *, u64 addr, u64 size, + int (*ctor)(struct nvkm_mmu *, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *, const char *name, struct nvkm_vmm **); bool global; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c index 6889076097ec..4b9f07a31219 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c @@ -304,8 +304,10 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_uvmm *uvmm; int ret = -ENOSYS; u64 addr, size; + bool managed; if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, more))) { + managed = args->v0.managed != 0; addr = args->v0.addr; size = args->v0.size; } else @@ -317,7 +319,7 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, *pobject = &uvmm->object; if (!mmu->vmm) { - ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, + ret = mmu->func->vmm.ctor(mmu, managed, addr, size, argv, argc, NULL, "user", &uvmm->vmm); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 6b87fff014b3..5274ab0598af 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -972,16 +972,32 @@ nvkm_vmm_dtor(struct nvkm_vmm *vmm) } } +static int +nvkm_vmm_ctor_managed(struct nvkm_vmm *vmm, u64 addr, u64 size) +{ + struct nvkm_vma *vma; + if (!(vma = nvkm_vma_new(addr, size))) + return -ENOMEM; + vma->mapref = true; + vma->sparse = false; + vma->used = true; + vma->user = true; + nvkm_vmm_node_insert(vmm, vma); + list_add_tail(&vma->head, &vmm->list); + return 0; +} + int nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, - u32 pd_header, u64 addr, u64 size, struct lock_class_key *key, - const char *name, struct nvkm_vmm *vmm) + u32 pd_header, bool managed, u64 addr, u64 size, + struct lock_class_key *key, const char *name, + struct nvkm_vmm *vmm) { static struct lock_class_key _key; const struct nvkm_vmm_page *page = func->page; const struct nvkm_vmm_desc *desc; struct nvkm_vma *vma; - int levels, bits = 0; + int levels, bits = 0, ret; vmm->func = func; vmm->mmu = mmu; @@ -1009,11 +1025,6 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX)) return -EINVAL; - vmm->start = addr; - vmm->limit = size ? (addr + size) : (1ULL << bits); - if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits)) - return -EINVAL; - /* Allocate top-level page table. */ vmm->pd = nvkm_vmm_pt_new(desc, false, NULL); if (!vmm->pd) @@ -1036,22 +1047,61 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, vmm->free = RB_ROOT; vmm->root = RB_ROOT; - if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start))) - return -ENOMEM; + if (managed) { + /* Address-space will be managed by the client for the most + * part, except for a specified area where NVKM allocations + * are allowed to be placed. + */ + vmm->start = 0; + vmm->limit = 1ULL << bits; + if (addr + size < addr || addr + size > vmm->limit) + return -EINVAL; + + /* Client-managed area before the NVKM-managed area. */ + if (addr && (ret = nvkm_vmm_ctor_managed(vmm, 0, addr))) + return ret; + + /* NVKM-managed area. */ + if (size) { + if (!(vma = nvkm_vma_new(addr, size))) + return -ENOMEM; + nvkm_vmm_free_insert(vmm, vma); + list_add_tail(&vma->head, &vmm->list); + } + + /* Client-managed area after the NVKM-managed area. */ + addr = addr + size; + size = vmm->limit - addr; + if (size && (ret = nvkm_vmm_ctor_managed(vmm, addr, size))) + return ret; + } else { + /* Address-space fully managed by NVKM, requiring calls to + * nvkm_vmm_get()/nvkm_vmm_put() to allocate address-space. + */ + vmm->start = addr; + vmm->limit = size ? (addr + size) : (1ULL << bits); + if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits)) + return -EINVAL; + + if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start))) + return -ENOMEM; + + nvkm_vmm_free_insert(vmm, vma); + list_add(&vma->head, &vmm->list); + } - nvkm_vmm_free_insert(vmm, vma); - list_add(&vma->head, &vmm->list); return 0; } int nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, - u32 hdr, u64 addr, u64 size, struct lock_class_key *key, - const char *name, struct nvkm_vmm **pvmm) + u32 hdr, bool managed, u64 addr, u64 size, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) { if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL))) return -ENOMEM; - return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm); + return nvkm_vmm_ctor(func, mmu, hdr, managed, addr, size, key, name, *pvmm); } void @@ -1584,7 +1634,8 @@ nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv, struct nvkm_mmu *mmu = device->mmu; struct nvkm_vmm *vmm = NULL; int ret; - ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm); + ret = mmu->func->vmm.ctor(mmu, false, addr, size, argv, argc, + key, name, &vmm); if (ret) nvkm_vmm_unref(&vmm); *pvmm = vmm; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 68474f1f09c1..e272e81ac292 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -153,11 +153,12 @@ struct nvkm_vmm_join { }; int nvkm_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, - u32 pd_header, u64 addr, u64 size, struct lock_class_key *, - const char *name, struct nvkm_vmm **); + u32 pd_header, bool managed, u64 addr, u64 size, + struct lock_class_key *, const char *name, + struct nvkm_vmm **); int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *, - u32 pd_header, u64 addr, u64 size, struct lock_class_key *, - const char *name, struct nvkm_vmm *); + u32 pd_header, bool managed, u64 addr, u64 size, + struct lock_class_key *, const char *name, struct nvkm_vmm *); struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr); struct nvkm_vma *nvkm_vmm_node_split(struct nvkm_vmm *, struct nvkm_vma *, u64 addr, u64 size); @@ -171,7 +172,7 @@ void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma); struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail); int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, - u64, u64, void *, u32, struct lock_class_key *, + bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); @@ -181,7 +182,7 @@ int nv50_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); void nv50_vmm_flush(struct nvkm_vmm *, int); int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, - struct nvkm_mmu *, u64, u64, void *, u32, + struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int gf100_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); int gf100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); @@ -195,7 +196,7 @@ void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); int gk20a_vmm_aper(enum nvkm_memory_target); int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, - struct nvkm_mmu *, u64, u64, void *, u32, + struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int gm200_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); @@ -207,46 +208,46 @@ void gp100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); int gv100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); -int nv04_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int nv04_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int nv41_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int nv41_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int nv44_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int nv50_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int mcp77_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int mcp77_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int g84_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gf100_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gk104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gk104_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gk20a_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gk20a_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gm200_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32, +int gm200_vmm_new_fixed(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gm200_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gm200_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gm20b_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32, +int gm20b_vmm_new_fixed(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gm20b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gm20b_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gp100_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gp10b_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int gv100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int gv100_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); -int tu102_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, +int tu102_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c index b414413b07d4..ab6424faf84c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c @@ -400,14 +400,14 @@ gf100_vmm_16 = { int gf100_vmm_new_(const struct nvkm_vmm_func *func_16, const struct nvkm_vmm_func *func_17, - struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) + struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { switch (mmu->subdev.device->fb->page) { - case 16: return nv04_vmm_new_(func_16, mmu, 0, addr, size, + case 16: return nv04_vmm_new_(func_16, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); - case 17: return nv04_vmm_new_(func_17, mmu, 0, addr, size, + case 17: return nv04_vmm_new_(func_17, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); default: WARN_ON(1); @@ -416,10 +416,10 @@ gf100_vmm_new_(const struct nvkm_vmm_func *func_16, } int -gf100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gf100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return gf100_vmm_new_(&gf100_vmm_16, &gf100_vmm_17, mmu, addr, + return gf100_vmm_new_(&gf100_vmm_16, &gf100_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c index 2679068e57ac..0b59c01fd146 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c @@ -95,10 +95,10 @@ gk104_vmm_16 = { }; int -gk104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gk104_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return gf100_vmm_new_(&gk104_vmm_16, &gk104_vmm_17, mmu, addr, + return gf100_vmm_new_(&gk104_vmm_16, &gk104_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c index 5c6f645080ae..5a9582dce970 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c @@ -64,10 +64,10 @@ gk20a_vmm_16 = { }; int -gk20a_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gk20a_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return gf100_vmm_new_(&gk20a_vmm_16, &gk20a_vmm_17, mmu, addr, + return gf100_vmm_new_(&gk20a_vmm_16, &gk20a_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c index dd7f60753f6d..2e61af02d4d8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c @@ -141,9 +141,9 @@ gm200_vmm_16 = { int gm200_vmm_new_(const struct nvkm_vmm_func *func_16, const struct nvkm_vmm_func *func_17, - struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) + struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { const struct nvkm_vmm_func *func; union { @@ -165,23 +165,23 @@ gm200_vmm_new_(const struct nvkm_vmm_func *func_16, } else return ret; - return nvkm_vmm_new_(func, mmu, 0, addr, size, key, name, pvmm); + return nvkm_vmm_new_(func, mmu, 0, managed, addr, size, key, name, pvmm); } int -gm200_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gm200_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return gm200_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr, + return gm200_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } int -gm200_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size, +gm200_vmm_new_fixed(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return gf100_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr, + return gf100_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c index 57d763c9629e..96b759695dd8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c @@ -54,19 +54,19 @@ gm20b_vmm_16 = { }; int -gm20b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gm20b_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return gm200_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr, + return gm200_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } int -gm20b_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size, +gm20b_vmm_new_fixed(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return gf100_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr, + return gf100_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index f9c95c43c7a3..4b8ebaa63081 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -352,10 +352,10 @@ gp100_vmm = { }; int -gp100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gp100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&gp100_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&gp100_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index 13d0d48bab55..07b91d835e18 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -41,10 +41,10 @@ gp10b_vmm = { }; int -gp10b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gp10b_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&gp10b_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&gp10b_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c index 8574bc980bbb..da5841aad70a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c @@ -79,10 +79,10 @@ gv100_vmm = { }; int -gv100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +gv100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&gv100_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&gv100_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c index e63d984cbfd4..bdddd99f5877 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c @@ -36,10 +36,10 @@ mcp77_vmm = { }; int -mcp77_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) +mcp77_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&mcp77_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&mcp77_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c index 0cab1ffc9f64..4c6b3b7d221f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c @@ -100,16 +100,17 @@ nv04_vmm = { int nv04_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, - u32 pd_header, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, - struct nvkm_vmm **pvmm) + u32 pd_header, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) { union { struct nv04_vmm_vn vn; } *args = argv; int ret; - ret = nvkm_vmm_new_(func, mmu, pd_header, addr, size, key, name, pvmm); + ret = nvkm_vmm_new_(func, mmu, pd_header, managed, addr, size, + key, name, pvmm); if (ret) return ret; @@ -117,15 +118,15 @@ nv04_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, } int -nv04_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, +nv04_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { struct nvkm_memory *mem; struct nvkm_vmm *vmm; int ret; - ret = nv04_vmm_new_(&nv04_vmm, mmu, 8, addr, size, + ret = nv04_vmm_new_(&nv04_vmm, mmu, 8, managed, addr, size, argv, argc, key, name, &vmm); *pvmm = vmm; if (ret) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c index b595f130e573..1d3369683a21 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c @@ -104,10 +104,10 @@ nv41_vmm = { }; int -nv41_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, +nv41_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&nv41_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&nv41_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c index b834e4352334..a82936ba9890 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c @@ -205,15 +205,15 @@ nv44_vmm = { }; int -nv44_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, +nv44_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { struct nvkm_subdev *subdev = &mmu->subdev; struct nvkm_vmm *vmm; int ret; - ret = nv04_vmm_new_(&nv44_vmm, mmu, 0, addr, size, + ret = nv04_vmm_new_(&nv44_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, &vmm); *pvmm = vmm; if (ret) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c index 64f75d906202..c98afe3134ee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c @@ -376,10 +376,10 @@ nv50_vmm = { }; int -nv50_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, - struct lock_class_key *key, const char *name, +nv50_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&nv50_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&nv50_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index 56c630d141da..ddab7c6bfe7c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -68,10 +68,10 @@ tu102_vmm = { }; int -tu102_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, +tu102_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&tu102_vmm, mmu, 0, addr, size, + return nv04_vmm_new_(&tu102_vmm, mmu, 0, managed, addr, size, argv, argc, key, name, pvmm); } From 8e68271d7ce472ff75af8cf4988edf1c797253a5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jul 2018 18:29:20 +1000 Subject: [PATCH 48/60] drm/nouveau/mmu: store mapped flag separately from memory pointer This will be used to support a privileged client providing PTEs directly, without a memory object to use as a reference. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 215a672f1cde..da00fafe0c2f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -17,6 +17,7 @@ struct nvkm_vma { bool part:1; /* Region was split from an allocated region by map(). */ bool user:1; /* Region user-allocated. */ bool busy:1; /* Region busy (for temporarily preventing user access). */ + bool mapped:1; /* Region contains valid pages. */ struct nvkm_memory *memory; /* Memory currently mapped into VMA. */ struct nvkm_tags *tags; /* Compression tag reference. */ }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 5274ab0598af..69b61e799fd4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -763,6 +763,7 @@ nvkm_vma_tail(struct nvkm_vma *vma, u64 tail) new->part = vma->part; new->user = vma->user; new->busy = vma->busy; + new->mapped = vma->mapped; list_add(&new->head, &vma->head); return new; } @@ -1112,10 +1113,11 @@ nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); nvkm_memory_unref(&vma->memory); + vma->mapped = false; - if (!vma->part || ((prev = node(vma, prev)), prev->memory)) + if (!vma->part || ((prev = node(vma, prev)), prev->mapped)) prev = NULL; - if (!next->part || next->memory) + if (!next->part || next->mapped) next = NULL; nvkm_vmm_node_merge(vmm, prev, vma, next, vma->size); } @@ -1274,6 +1276,7 @@ nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); nvkm_memory_unref(&vma->memory); vma->memory = nvkm_memory_ref(map->memory); + vma->mapped = true; vma->tags = map->tags; return 0; } @@ -1319,14 +1322,16 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) if (vma->mapref || !vma->sparse) { do { - const bool map = next->memory != NULL; + const bool mem = next->memory != NULL; + const bool map = next->mapped; const u8 refd = next->refd; const u64 addr = next->addr; u64 size = next->size; /* Merge regions that are in the same state. */ while ((next = node(next, next)) && next->part && - (next->memory != NULL) == map && + (next->mapped == map) && + (next->memory != NULL) == mem && (next->refd == refd)) size += next->size; @@ -1351,7 +1356,7 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) */ next = vma; do { - if (next->memory) + if (next->mapped) nvkm_vmm_unmap_region(vmm, next); } while ((next = node(vma, next)) && next->part); From a5ff307fe1f2dfe91253e3c19586643a77b6ce52 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 7 Jul 2018 12:35:48 +1000 Subject: [PATCH 49/60] drm/nouveau/mmu: add a privileged method to directly manage PTEs This provides a somewhat more direct method of manipulating the GPU page tables, which will be required to support SVM. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/if000c.h | 26 ++ .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c | 72 ++++- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 288 ++++++++++++++++-- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 20 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 78 +++++ 6 files changed, 451 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h index f8e29cfee7f8..20374882ac8f 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -15,6 +15,8 @@ struct nvif_vmm_v0 { #define NVIF_VMM_V0_PUT 0x02 #define NVIF_VMM_V0_MAP 0x03 #define NVIF_VMM_V0_UNMAP 0x04 +#define NVIF_VMM_V0_PFNMAP 0x05 +#define NVIF_VMM_V0_PFNCLR 0x06 struct nvif_vmm_page_v0 { __u8 version; @@ -62,4 +64,28 @@ struct nvif_vmm_unmap_v0 { __u8 pad01[7]; __u64 addr; }; + +struct nvif_vmm_pfnmap_v0 { + __u8 version; + __u8 page; + __u8 pad02[6]; + __u64 addr; + __u64 size; +#define NVIF_VMM_PFNMAP_V0_ADDR 0xfffffffffffff000ULL +#define NVIF_VMM_PFNMAP_V0_ADDR_SHIFT 12 +#define NVIF_VMM_PFNMAP_V0_APER 0x00000000000000f0ULL +#define NVIF_VMM_PFNMAP_V0_HOST 0x0000000000000000ULL +#define NVIF_VMM_PFNMAP_V0_VRAM 0x0000000000000010ULL +#define NVIF_VMM_PFNMAP_V0_W 0x0000000000000002ULL +#define NVIF_VMM_PFNMAP_V0_V 0x0000000000000001ULL +#define NVIF_VMM_PFNMAP_V0_NONE 0x0000000000000000ULL + __u64 phys[]; +}; + +struct nvif_vmm_pfnclr_v0 { + __u8 version; + __u8 pad01[7]; + __u64 addr; + __u64 size; +}; #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index da00fafe0c2f..8b117cbc8538 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -64,6 +64,7 @@ struct nvkm_vmm_map { struct nvkm_mm_node *mem; struct scatterlist *sgl; dma_addr_t *dma; + u64 *pfn; u64 off; const struct nvkm_vmm_page *page; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c index 4b9f07a31219..b483cda151bf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c @@ -42,6 +42,69 @@ nvkm_uvmm_search(struct nvkm_client *client, u64 handle) return nvkm_uvmm(object)->vmm; } +static int +nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_pfnclr_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + int ret = -ENOSYS; + u64 addr, size; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + addr = args->v0.addr; + size = args->v0.size; + } else + return ret; + + if (!client->super) + return -ENOENT; + + if (size) { + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_pfn_unmap(vmm, addr, size); + mutex_unlock(&vmm->mutex); + } + + return ret; +} + +static int +nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) +{ + struct nvkm_client *client = uvmm->object.client; + union { + struct nvif_vmm_pfnmap_v0 v0; + } *args = argv; + struct nvkm_vmm *vmm = uvmm->vmm; + int ret = -ENOSYS; + u64 addr, size, *phys; + u8 page; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) { + page = args->v0.page; + addr = args->v0.addr; + size = args->v0.size; + phys = args->v0.phys; + if (argc != (size >> page) * sizeof(args->v0.phys[0])) + return -EINVAL; + } else + return ret; + + if (!client->super) + return -ENOENT; + + if (size) { + mutex_lock(&vmm->mutex); + ret = nvkm_vmm_pfn_map(vmm, page, addr, size, phys); + mutex_unlock(&vmm->mutex); + } + + return ret; +} + static int nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) { @@ -78,7 +141,7 @@ nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) goto done; } - nvkm_vmm_unmap_locked(vmm, vma); + nvkm_vmm_unmap_locked(vmm, vma, false); ret = 0; done: mutex_unlock(&vmm->mutex); @@ -124,6 +187,11 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc) goto fail; } + if (ret = -EINVAL, vma->mapped && !vma->memory) { + VMM_DEBUG(vmm, "pfnmap %016llx", addr); + goto fail; + } + if (ret = -EINVAL, vma->addr != addr || vma->size != size) { if (addr + size > vma->addr + vma->size || vma->memory || (vma->refd == NVKM_VMA_PAGE_NONE && !vma->mapref)) { @@ -271,6 +339,8 @@ nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc) case NVIF_VMM_V0_PUT : return nvkm_uvmm_mthd_put (uvmm, argv, argc); case NVIF_VMM_V0_MAP : return nvkm_uvmm_mthd_map (uvmm, argv, argc); case NVIF_VMM_V0_UNMAP : return nvkm_uvmm_mthd_unmap (uvmm, argv, argc); + case NVIF_VMM_V0_PFNMAP: return nvkm_uvmm_mthd_pfnmap(uvmm, argv, argc); + case NVIF_VMM_V0_PFNCLR: return nvkm_uvmm_mthd_pfnclr(uvmm, argv, argc); default: break; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 69b61e799fd4..fa93f964e6a4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -255,11 +255,23 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, } static bool -nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) { const struct nvkm_vmm_desc *desc = it->desc; const int type = desc->type == SPT; struct nvkm_vmm_pt *pgt = it->pt[0]; + bool dma; + + if (pfn) { + /* Need to clear PTE valid bits before we dma_unmap_page(). */ + dma = desc->func->pfn_clear(it->vmm, pgt->pt[type], ptei, ptes); + if (dma) { + /* GPU may have cached the PT, flush before unmap. */ + nvkm_vmm_flush_mark(it); + nvkm_vmm_flush(it); + desc->func->pfn_unmap(it->vmm, pgt->pt[type], ptei, ptes); + } + } /* Drop PTE references. */ pgt->refs[type] -= ptes; @@ -349,7 +361,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, } static bool -nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) { const struct nvkm_vmm_desc *desc = it->desc; const int type = desc->type == SPT; @@ -379,7 +391,7 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, } static bool -nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) { struct nvkm_vmm_pt *pt = it->pt[0]; if (it->desc->type == PGD) @@ -387,14 +399,14 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) else if (it->desc->type == LPT) memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); - return nvkm_vmm_unref_ptes(it, ptei, ptes); + return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes); } static bool -nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) { nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes); - return nvkm_vmm_ref_ptes(it, ptei, ptes); + return nvkm_vmm_ref_ptes(it, pfn, ptei, ptes); } static bool @@ -487,8 +499,8 @@ nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) static inline u64 nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, - u64 addr, u64 size, const char *name, bool ref, - bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32), + u64 addr, u64 size, const char *name, bool ref, bool pfn, + bool (*REF_PTES)(struct nvkm_vmm_iter *, bool pfn, u32, u32), nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map, nvkm_vmm_pxe_func CLR_PTES) { @@ -548,7 +560,7 @@ nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, } /* Handle PTE updates. */ - if (!REF_PTES || REF_PTES(&it, ptei, ptes)) { + if (!REF_PTES || REF_PTES(&it, pfn, ptei, ptes)) { struct nvkm_mmu_pt *pt = pgt->pt[type]; if (MAP_PTES || CLR_PTES) { if (MAP_PTES) @@ -590,7 +602,7 @@ static void nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size) { - nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, + nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, false, nvkm_vmm_sparse_unref_ptes, NULL, NULL, page->desc->func->invalid ? page->desc->func->invalid : page->desc->func->unmap); @@ -602,8 +614,8 @@ nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, { if ((page->type & NVKM_VMM_PAGE_SPARSE)) { u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref", - true, nvkm_vmm_sparse_ref_ptes, NULL, - NULL, page->desc->func->sparse); + true, false, nvkm_vmm_sparse_ref_ptes, + NULL, NULL, page->desc->func->sparse); if (fail != ~0ULL) { if ((size = fail - addr)) nvkm_vmm_ptes_sparse_put(vmm, page, addr, size); @@ -666,11 +678,11 @@ nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref) static void nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, - u64 addr, u64 size, bool sparse) + u64 addr, u64 size, bool sparse, bool pfn) { const struct nvkm_vmm_desc_func *func = page->desc->func; nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref", - false, nvkm_vmm_unref_ptes, NULL, NULL, + false, pfn, nvkm_vmm_unref_ptes, NULL, NULL, sparse ? func->sparse : func->invalid ? func->invalid : func->unmap); } @@ -681,10 +693,10 @@ nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, nvkm_vmm_pte_func func) { u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true, - nvkm_vmm_ref_ptes, func, map, NULL); + false, nvkm_vmm_ref_ptes, func, map, NULL); if (fail != ~0ULL) { if ((size = fail - addr)) - nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false); + nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false, false); return -ENOMEM; } return 0; @@ -692,10 +704,11 @@ nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, static void nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, - u64 addr, u64 size, bool sparse) + u64 addr, u64 size, bool sparse, bool pfn) { const struct nvkm_vmm_desc_func *func = page->desc->func; - nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL, + nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, pfn, + NULL, NULL, NULL, sparse ? func->sparse : func->invalid ? func->invalid : func->unmap); } @@ -705,7 +718,7 @@ nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size, struct nvkm_vmm_map *map, nvkm_vmm_pte_func func) { - nvkm_vmm_iter(vmm, page, addr, size, "map", false, + nvkm_vmm_iter(vmm, page, addr, size, "map", false, false, NULL, func, map, NULL); } @@ -713,7 +726,7 @@ static void nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size) { - nvkm_vmm_iter(vmm, page, addr, size, "unref", false, + nvkm_vmm_iter(vmm, page, addr, size, "unref", false, false, nvkm_vmm_unref_ptes, NULL, NULL, NULL); } @@ -721,7 +734,7 @@ static int nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, u64 addr, u64 size) { - u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, + u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, false, nvkm_vmm_ref_ptes, NULL, NULL, NULL); if (fail != ~0ULL) { if (fail != addr) @@ -935,12 +948,41 @@ nvkm_vmm_node_split(struct nvkm_vmm *vmm, return vma; } +static void +nvkm_vma_dump(struct nvkm_vma *vma) +{ + printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c%c %p\n", + vma->addr, (u64)vma->size, + vma->used ? '-' : 'F', + vma->mapref ? 'R' : '-', + vma->sparse ? 'S' : '-', + vma->page != NVKM_VMA_PAGE_NONE ? '0' + vma->page : '-', + vma->refd != NVKM_VMA_PAGE_NONE ? '0' + vma->refd : '-', + vma->part ? 'P' : '-', + vma->user ? 'U' : '-', + vma->busy ? 'B' : '-', + vma->mapped ? 'M' : '-', + vma->memory); +} + +static void +nvkm_vmm_dump(struct nvkm_vmm *vmm) +{ + struct nvkm_vma *vma; + list_for_each_entry(vma, &vmm->list, head) { + nvkm_vma_dump(vma); + } +} + static void nvkm_vmm_dtor(struct nvkm_vmm *vmm) { struct nvkm_vma *vma; struct rb_node *node; + if (0) + nvkm_vmm_dump(vmm); + while ((node = rb_first(&vmm->root))) { struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); nvkm_vmm_put(vmm, &vma); @@ -1105,33 +1147,216 @@ nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, return nvkm_vmm_ctor(func, mmu, hdr, managed, addr, size, key, name, *pvmm); } +static struct nvkm_vma * +nvkm_vmm_pfn_split_merge(struct nvkm_vmm *vmm, struct nvkm_vma *vma, + u64 addr, u64 size, u8 page, bool map) +{ + struct nvkm_vma *prev = NULL; + struct nvkm_vma *next = NULL; + + if (vma->addr == addr && vma->part && (prev = node(vma, prev))) { + if (prev->memory || prev->mapped != map) + prev = NULL; + } + + if (vma->addr + vma->size == addr + size && (next = node(vma, next))) { + if (!next->part || + next->memory || next->mapped != map) + next = NULL; + } + + if (prev || next) + return nvkm_vmm_node_merge(vmm, prev, vma, next, size); + return nvkm_vmm_node_split(vmm, vma, addr, size); +} + +int +nvkm_vmm_pfn_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size) +{ + struct nvkm_vma *vma = nvkm_vmm_node_search(vmm, addr); + struct nvkm_vma *next; + u64 limit = addr + size; + u64 start = addr; + + if (!vma) + return -EINVAL; + + do { + if (!vma->mapped || vma->memory) + continue; + + size = min(limit - start, vma->size - (start - vma->addr)); + + nvkm_vmm_ptes_unmap_put(vmm, &vmm->func->page[vma->refd], + start, size, false, true); + + next = nvkm_vmm_pfn_split_merge(vmm, vma, start, size, 0, false); + if (!WARN_ON(!next)) { + vma = next; + vma->refd = NVKM_VMA_PAGE_NONE; + vma->mapped = false; + } + } while ((vma = node(vma, next)) && (start = vma->addr) < limit); + + return 0; +} + +/*TODO: + * - Avoid PT readback (for dma_unmap etc), this might end up being dealt + * with inside HMM, which would be a lot nicer for us to deal with. + * - Multiple page sizes (particularly for huge page support). + * - Support for systems without a 4KiB page size. + */ +int +nvkm_vmm_pfn_map(struct nvkm_vmm *vmm, u8 shift, u64 addr, u64 size, u64 *pfn) +{ + const struct nvkm_vmm_page *page = vmm->func->page; + struct nvkm_vma *vma, *tmp; + u64 limit = addr + size; + u64 start = addr; + int pm = size >> shift; + int pi = 0; + + /* Only support mapping where the page size of the incoming page + * array matches a page size available for direct mapping. + */ + while (page->shift && page->shift != shift && + page->desc->func->pfn == NULL) + page++; + + if (!page->shift || !IS_ALIGNED(addr, 1ULL << shift) || + !IS_ALIGNED(size, 1ULL << shift) || + addr + size < addr || addr + size > vmm->limit) { + VMM_DEBUG(vmm, "paged map %d %d %016llx %016llx\n", + shift, page->shift, addr, size); + return -EINVAL; + } + + if (!(vma = nvkm_vmm_node_search(vmm, addr))) + return -ENOENT; + + do { + bool map = !!(pfn[pi] & NVKM_VMM_PFN_V); + bool mapped = vma->mapped; + u64 size = limit - start; + u64 addr = start; + int pn, ret = 0; + + /* Narrow the operation window to cover a single action (page + * should be mapped or not) within a single VMA. + */ + for (pn = 0; pi + pn < pm; pn++) { + if (map != !!(pfn[pi + pn] & NVKM_VMM_PFN_V)) + break; + } + size = min_t(u64, size, pn << page->shift); + size = min_t(u64, size, vma->size + vma->addr - addr); + + /* Reject any operation to unmanaged regions, and areas that + * have nvkm_memory objects mapped in them already. + */ + if (!vma->mapref || vma->memory) { + ret = -EINVAL; + goto next; + } + + /* In order to both properly refcount GPU page tables, and + * prevent "normal" mappings and these direct mappings from + * interfering with each other, we need to track contiguous + * ranges that have been mapped with this interface. + * + * Here we attempt to either split an existing VMA so we're + * able to flag the region as either unmapped/mapped, or to + * merge with adjacent VMAs that are already compatible. + * + * If the region is already compatible, nothing is required. + */ + if (map != mapped) { + tmp = nvkm_vmm_pfn_split_merge(vmm, vma, addr, size, + page - + vmm->func->page, map); + if (WARN_ON(!tmp)) { + ret = -ENOMEM; + goto next; + } + + if ((tmp->mapped = map)) + tmp->refd = page - vmm->func->page; + else + tmp->refd = NVKM_VMA_PAGE_NONE; + vma = tmp; + } + + /* Update HW page tables. */ + if (map) { + struct nvkm_vmm_map args; + args.page = page; + args.pfn = &pfn[pi]; + + if (!mapped) { + ret = nvkm_vmm_ptes_get_map(vmm, page, addr, + size, &args, page-> + desc->func->pfn); + } else { + nvkm_vmm_ptes_map(vmm, page, addr, size, &args, + page->desc->func->pfn); + } + } else { + if (mapped) { + nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, + false, true); + } + } + +next: + /* Iterate to next operation. */ + if (vma->addr + vma->size == addr + size) + vma = node(vma, next); + start += size; + + if (ret) { + /* Failure is signalled by clearing the valid bit on + * any PFN that couldn't be modified as requested. + */ + while (size) { + pfn[pi++] = NVKM_VMM_PFN_NONE; + size -= 1 << page->shift; + } + } else { + pi += size >> page->shift; + } + } while (vma && start < limit); + + return 0; +} + void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) { - struct nvkm_vma *next = node(vma, next); struct nvkm_vma *prev = NULL; + struct nvkm_vma *next; nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); nvkm_memory_unref(&vma->memory); vma->mapped = false; - if (!vma->part || ((prev = node(vma, prev)), prev->mapped)) + if (vma->part && (prev = node(vma, prev)) && prev->mapped) prev = NULL; - if (!next->part || next->mapped) + if ((next = node(vma, next)) && (!next->part || next->mapped)) next = NULL; nvkm_vmm_node_merge(vmm, prev, vma, next, vma->size); } void -nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) +nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, bool pfn) { const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd]; if (vma->mapref) { - nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse); + nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse, pfn); vma->refd = NVKM_VMA_PAGE_NONE; } else { - nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse); + nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse, pfn); } nvkm_vmm_unmap_region(vmm, vma); @@ -1142,7 +1367,7 @@ nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma) { if (vma->memory) { mutex_lock(&vmm->mutex); - nvkm_vmm_unmap_locked(vmm, vma); + nvkm_vmm_unmap_locked(vmm, vma, false); mutex_unlock(&vmm->mutex); } } @@ -1341,7 +1566,8 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) * the page tree. */ nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr, - size, vma->sparse); + size, vma->sparse, + !mem); } else if (refd != NVKM_VMA_PAGE_NONE) { /* Drop allocation-time PTE references. */ @@ -1577,7 +1803,7 @@ nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) } static bool -nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) +nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) { const struct nvkm_vmm_desc *desc = it->desc; const int type = desc->type == SPT; @@ -1599,7 +1825,7 @@ nvkm_vmm_boot(struct nvkm_vmm *vmm) if (ret) return ret; - nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, + nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, false, nvkm_vmm_boot_ptes, NULL, NULL, NULL); vmm->bootstrapped = true; return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index e272e81ac292..3c74bba03138 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -67,6 +67,10 @@ struct nvkm_vmm_desc_func { nvkm_vmm_pte_func mem; nvkm_vmm_pte_func dma; nvkm_vmm_pte_func sgl; + + nvkm_vmm_pte_func pfn; + bool (*pfn_clear)(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32 ptei, u32 ptes); + nvkm_vmm_pxe_func pfn_unmap; }; extern const struct nvkm_vmm_desc_func gf100_vmm_pgd; @@ -166,8 +170,20 @@ int nvkm_vmm_get_locked(struct nvkm_vmm *, bool getref, bool mapref, bool sparse, u8 page, u8 align, u64 size, struct nvkm_vma **pvma); void nvkm_vmm_put_locked(struct nvkm_vmm *, struct nvkm_vma *); -void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *); -void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma); +void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *, bool pfn); +void nvkm_vmm_unmap_region(struct nvkm_vmm *, struct nvkm_vma *); + +#define NVKM_VMM_PFN_ADDR 0xfffffffffffff000ULL +#define NVKM_VMM_PFN_ADDR_SHIFT 12 +#define NVKM_VMM_PFN_APER 0x00000000000000f0ULL +#define NVKM_VMM_PFN_HOST 0x0000000000000000ULL +#define NVKM_VMM_PFN_VRAM 0x0000000000000010ULL +#define NVKM_VMM_PFN_W 0x0000000000000002ULL +#define NVKM_VMM_PFN_V 0x0000000000000001ULL +#define NVKM_VMM_PFN_NONE 0x0000000000000000ULL + +int nvkm_vmm_pfn_map(struct nvkm_vmm *, u8 page, u64 addr, u64 size, u64 *pfn); +int nvkm_vmm_pfn_unmap(struct nvkm_vmm *, u64 addr, u64 size); struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index 4b8ebaa63081..84c855130cb5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -27,6 +27,81 @@ #include #include +static void +gp100_vmm_pfn_unmap(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + struct device *dev = vmm->mmu->subdev.device->dev; + dma_addr_t addr; + + nvkm_kmap(pt->memory); + while (ptes--) { + u32 datalo = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 0); + u32 datahi = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 4); + u64 data = (u64)datahi << 32 | datalo; + if ((data & (3ULL << 1)) != 0) { + addr = (data >> 8) << 12; + dma_unmap_page(dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } + ptei++; + } + nvkm_done(pt->memory); +} + +static bool +gp100_vmm_pfn_clear(struct nvkm_vmm *vmm, + struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes) +{ + bool dma = false; + nvkm_kmap(pt->memory); + while (ptes--) { + u32 datalo = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 0); + u32 datahi = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 4); + u64 data = (u64)datahi << 32 | datalo; + if ((data & BIT_ULL(0)) && (data & (3ULL << 1)) != 0) { + VMM_WO064(pt, vmm, ptei * 8, data & ~BIT_ULL(0)); + dma = true; + } + ptei++; + } + nvkm_done(pt->memory); + return dma; +} + +static void +gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, + u32 ptei, u32 ptes, struct nvkm_vmm_map *map) +{ + struct device *dev = vmm->mmu->subdev.device->dev; + dma_addr_t addr; + + nvkm_kmap(pt->memory); + while (ptes--) { + u64 data = 0; + if (!(*map->pfn & NVKM_VMM_PFN_W)) + data |= BIT_ULL(6); /* RO. */ + + if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) { + addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT; + addr = dma_map_page(dev, pfn_to_page(addr), 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (!WARN_ON(dma_mapping_error(dev, addr))) { + data |= addr >> 4; + data |= 2ULL << 1; /* SYSTEM_COHERENT_MEMORY. */ + data |= BIT_ULL(3); /* VOL. */ + data |= BIT_ULL(0); /* VALID. */ + } + } else { + data |= (*map->pfn & NVKM_VMM_PFN_ADDR) >> 4; + data |= BIT_ULL(0); /* VALID. */ + } + + VMM_WO064(pt, vmm, ptei++ * 8, data); + map->pfn++; + } + nvkm_done(pt->memory); +} + static inline void gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) @@ -89,6 +164,9 @@ gp100_vmm_desc_spt = { .mem = gp100_vmm_pgt_mem, .dma = gp100_vmm_pgt_dma, .sgl = gp100_vmm_pgt_sgl, + .pfn = gp100_vmm_pgt_pfn, + .pfn_clear = gp100_vmm_pfn_clear, + .pfn_unmap = gp100_vmm_pfn_unmap, }; static void From 71871aa6df5009ebf39ae94d15d0e9836bf91c03 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Jul 2018 16:07:40 +1000 Subject: [PATCH 50/60] drm/nouveau/mmu/gp100-: add privileged methods for fault replay/cancel Host methods exist to do at least some of what we need, but we are not currently pushing replay/cancels through a channel like UVM does as it's not clear whether it's necessary in our case (UVM also updates PTEs with the GPU). UVM also pushes a software method for fault cancels on Pascal, seemingly because the host methods don't appear to be sufficient. If/when we want to push the replay/cancel on the GPU, we can re-purpose the cancellation code here to implement that swmthd. Keep it simple for now, until we figure out exactly what we need here. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/if000c.h | 1 + drivers/gpu/drm/nouveau/include/nvif/ifc00d.h | 15 ++++ .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c | 7 ++ drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 4 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 71 +++++++++++++++++++ .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 1 + .../drm/nouveau/nvkm/subdev/mmu/vmmgv100.c | 1 + .../drm/nouveau/nvkm/subdev/mmu/vmmtu102.c | 1 + 8 files changed, 101 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h index 20374882ac8f..d6dd40f21eed 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -17,6 +17,7 @@ struct nvif_vmm_v0 { #define NVIF_VMM_V0_UNMAP 0x04 #define NVIF_VMM_V0_PFNMAP 0x05 #define NVIF_VMM_V0_PFNCLR 0x06 +#define NVIF_VMM_V0_MTHD(i) ((i) + 0x80) struct nvif_vmm_page_v0 { __u8 version; diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h index 1d9c637859f3..33ff6c791643 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h +++ b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h @@ -18,4 +18,19 @@ struct gp100_vmm_map_v0 { __u8 priv; __u8 kind; }; + +#define GP100_VMM_VN_FAULT_REPLAY NVIF_VMM_V0_MTHD(0x00) +#define GP100_VMM_VN_FAULT_CANCEL NVIF_VMM_V0_MTHD(0x01) + +struct gp100_vmm_fault_replay_vn { +}; + +struct gp100_vmm_fault_cancel_v0 { + __u8 version; + __u8 hub; + __u8 gpc; + __u8 client; + __u8 pad04[4]; + __u64 inst; +}; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c index b483cda151bf..c43b8248c682 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c @@ -341,6 +341,13 @@ nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc) case NVIF_VMM_V0_UNMAP : return nvkm_uvmm_mthd_unmap (uvmm, argv, argc); case NVIF_VMM_V0_PFNMAP: return nvkm_uvmm_mthd_pfnmap(uvmm, argv, argc); case NVIF_VMM_V0_PFNCLR: return nvkm_uvmm_mthd_pfnclr(uvmm, argv, argc); + case NVIF_VMM_V0_MTHD(0x00) ... NVIF_VMM_V0_MTHD(0x7f): + if (uvmm->vmm->func->mthd) { + return uvmm->vmm->func->mthd(uvmm->vmm, + uvmm->object.client, + mthd, argv, argc); + } + break; default: break; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 3c74bba03138..e85f19fdc597 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -145,6 +145,9 @@ struct nvkm_vmm_func { struct nvkm_vmm_map *); void (*flush)(struct nvkm_vmm *, int depth); + int (*mthd)(struct nvkm_vmm *, struct nvkm_client *, + u32 mthd, void *argv, u32 argc); + void (*invalidate_pdb)(struct nvkm_vmm *, u64 addr); u64 page_block; @@ -220,6 +223,7 @@ int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); void gp100_vmm_flush(struct nvkm_vmm *, int); +int gp100_vmm_mthd(struct nvkm_vmm *, struct nvkm_client *, u32, void *, u32); void gp100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); int gv100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index 84c855130cb5..af427daf9077 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -21,8 +21,11 @@ */ #include "vmm.h" +#include #include #include +#include +#include #include #include @@ -384,6 +387,73 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return 0; } +static int +gp100_vmm_fault_cancel(struct nvkm_vmm *vmm, void *argv, u32 argc) +{ + struct nvkm_device *device = vmm->mmu->subdev.device; + union { + struct gp100_vmm_fault_cancel_v0 v0; + } *args = argv; + int ret = -ENOSYS; + u32 inst, aper; + + if ((ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) + return ret; + + /* Translate MaxwellFaultBufferA instance pointer to the same + * format as the NV_GR_FECS_CURRENT_CTX register. + */ + aper = (args->v0.inst >> 8) & 3; + args->v0.inst >>= 12; + args->v0.inst |= aper << 28; + args->v0.inst |= 0x80000000; + + if (!WARN_ON(nvkm_gr_ctxsw_pause(device))) { + if ((inst = nvkm_gr_ctxsw_inst(device)) == args->v0.inst) { + gf100_vmm_invalidate(vmm, 0x0000001b + /* CANCEL_TARGETED. */ | + (args->v0.hub << 20) | + (args->v0.gpc << 15) | + (args->v0.client << 9)); + } + WARN_ON(nvkm_gr_ctxsw_resume(device)); + } + + return 0; +} + +static int +gp100_vmm_fault_replay(struct nvkm_vmm *vmm, void *argv, u32 argc) +{ + union { + struct gp100_vmm_fault_replay_vn vn; + } *args = argv; + int ret = -ENOSYS; + + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + gf100_vmm_invalidate(vmm, 0x0000000b); /* REPLAY_GLOBAL. */ + } + + return ret; +} + +int +gp100_vmm_mthd(struct nvkm_vmm *vmm, + struct nvkm_client *client, u32 mthd, void *argv, u32 argc) +{ + if (client->super) { + switch (mthd) { + case GP100_VMM_VN_FAULT_REPLAY: + return gp100_vmm_fault_replay(vmm, argv, argc); + case GP100_VMM_VN_FAULT_CANCEL: + return gp100_vmm_fault_cancel(vmm, argv, argc); + default: + break; + } + } + return -EINVAL; +} + void gp100_vmm_invalidate_pdb(struct nvkm_vmm *vmm, u64 addr) { @@ -417,6 +487,7 @@ gp100_vmm = { .aper = gf100_vmm_aper, .valid = gp100_vmm_valid, .flush = gp100_vmm_flush, + .mthd = gp100_vmm_mthd, .invalidate_pdb = gp100_vmm_invalidate_pdb, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index 07b91d835e18..317a83e40def 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -28,6 +28,7 @@ gp10b_vmm = { .aper = gk20a_vmm_aper, .valid = gp100_vmm_valid, .flush = gp100_vmm_flush, + .mthd = gp100_vmm_mthd, .invalidate_pdb = gp100_vmm_invalidate_pdb, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c index da5841aad70a..172d685932d0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c @@ -66,6 +66,7 @@ gv100_vmm = { .aper = gf100_vmm_aper, .valid = gp100_vmm_valid, .flush = gp100_vmm_flush, + .mthd = gp100_vmm_mthd, .invalidate_pdb = gp100_vmm_invalidate_pdb, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index ddab7c6bfe7c..3bfef146dd1f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -56,6 +56,7 @@ tu102_vmm = { .aper = gf100_vmm_aper, .valid = gp100_vmm_valid, .flush = tu102_vmm_flush, + .mthd = gp100_vmm_mthd, .page = { { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, From ab2ee9ffa38ac1bcb7321a615872739e3e240b75 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:48 +1000 Subject: [PATCH 51/60] drm/nouveau/mmu/gp100-: support vmms with gcc/tex replayable faults enabled Some GPU units are capable of supporting "replayable" page faults, where the execution unit will wait for SW to fixup GPU page tables rather than triggering a channel-fatal fault. This feature isn't useful (it's harmful, even) unless something like HMM is being used to manage events appearing in the replayable fault buffer, so, it's disabled by default. This commit allows a client to request it be enabled. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/ifc00d.h | 6 +++ .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 2 + .../gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c | 2 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c | 2 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 3 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 39 +++++++++++++++++-- .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 4 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgv100.c | 4 +- .../drm/nouveau/nvkm/subdev/mmu/vmmtu102.c | 4 +- 10 files changed, 56 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h index 33ff6c791643..4cabd613a280 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h +++ b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h @@ -6,6 +6,12 @@ struct gp100_vmm_vn { /* nvif_vmm_vX ... */ }; +struct gp100_vmm_v0 { + /* nvif_vmm_vX ... */ + __u8 version; + __u8 fault_replay; +}; + struct gp100_vmm_map_vn { /* nvif_vmm_map_vX ... */ }; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 8b117cbc8538..28ade86f74c5 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -45,6 +45,8 @@ struct nvkm_vmm { dma_addr_t null; void *nullp; + + bool replay; }; int nvkm_vmm_new(struct nvkm_device *, u64 addr, u64 size, void *argv, u32 argc, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c index 651b8805c67c..65cb9d28e60e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c @@ -31,7 +31,7 @@ gp100_mmu = { .dma_bits = 47, .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, - .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp100_vmm_new }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GP100}, gp100_vmm_new }, .kind = gm200_mmu_kind, .kind_sys = true, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c index 3bd3db31e0bb..0a50be9a785a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c @@ -31,7 +31,7 @@ gp10b_mmu = { .dma_bits = 47, .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, .mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map }, - .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new }, .kind = gm200_mmu_kind, .kind_sys = true, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c index f666cb57f69e..e0997eedd6d9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c @@ -31,7 +31,7 @@ gv100_mmu = { .dma_bits = 47, .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, - .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gv100_vmm_new }, + .vmm = {{ -1, 0, NVIF_CLASS_VMM_GP100}, gv100_vmm_new }, .kind = gm200_mmu_kind, .kind_sys = true, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index e85f19fdc597..5e55ecbd8005 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -220,6 +220,9 @@ int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, int gm200_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base); int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +int gp100_vmm_new_(const struct nvkm_vmm_func *, + struct nvkm_mmu *, bool, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); void gp100_vmm_flush(struct nvkm_vmm *, int); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index af427daf9077..b4f519768d5e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -476,7 +476,11 @@ gp100_vmm_flush(struct nvkm_vmm *vmm, int depth) int gp100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) { - const u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11); /* 64KiB */ + u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11) /* 64KiB */; + if (vmm->replay) { + base |= BIT_ULL(4); /* FAULT_REPLAY_TEX */ + base |= BIT_ULL(5); /* FAULT_REPLAY_GCC */ + } return gf100_vmm_join_(vmm, inst, base); } @@ -500,11 +504,40 @@ gp100_vmm = { } }; +int +gp100_vmm_new_(const struct nvkm_vmm_func *func, + struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, + void *argv, u32 argc, struct lock_class_key *key, + const char *name, struct nvkm_vmm **pvmm) +{ + union { + struct gp100_vmm_vn vn; + struct gp100_vmm_v0 v0; + } *args = argv; + int ret = -ENOSYS; + bool replay; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + replay = args->v0.fault_replay != 0; + } else + if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) { + replay = false; + } else + return ret; + + ret = nvkm_vmm_new_(func, mmu, 0, managed, addr, size, key, name, pvmm); + if (ret) + return ret; + + (*pvmm)->replay = replay; + return 0; +} + int gp100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&gp100_vmm, mmu, 0, managed, addr, size, - argv, argc, key, name, pvmm); + return gp100_vmm_new_(&gp100_vmm, mmu, managed, addr, size, + argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c index 317a83e40def..e081239afe58 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c @@ -46,6 +46,6 @@ gp10b_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&gp10b_vmm, mmu, 0, managed, addr, size, - argv, argc, key, name, pvmm); + return gp100_vmm_new_(&gp10b_vmm, mmu, managed, addr, size, + argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c index 172d685932d0..f0e21f63253a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c @@ -84,6 +84,6 @@ gv100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&gv100_vmm, mmu, 0, managed, addr, size, - argv, argc, key, name, pvmm); + return gp100_vmm_new_(&gv100_vmm, mmu, managed, addr, size, + argv, argc, key, name, pvmm); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index 3bfef146dd1f..be91cffc3b52 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -73,6 +73,6 @@ tu102_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size, void *argv, u32 argc, struct lock_class_key *key, const char *name, struct nvkm_vmm **pvmm) { - return nv04_vmm_new_(&tu102_vmm, mmu, 0, managed, addr, size, - argv, argc, key, name, pvmm); + return gp100_vmm_new_(&tu102_vmm, mmu, managed, addr, size, + argv, argc, key, name, pvmm); } From 13e957290647eafe75cfa46a5f1aaa2282d9a09c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:48 +1000 Subject: [PATCH 52/60] drm/nouveau/fault/gp100: expose MaxwellFaultBufferA This nvclass exposes the replayable fault buffer, which will be used by SVM to manage GPU page faults. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/class.h | 2 + drivers/gpu/drm/nouveau/include/nvif/clb069.h | 12 ++ .../drm/nouveau/include/nvkm/subdev/fault.h | 2 + .../gpu/drm/nouveau/nvkm/engine/device/user.c | 15 ++- .../gpu/drm/nouveau/nvkm/subdev/fault/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/fault/base.c | 2 + .../gpu/drm/nouveau/nvkm/subdev/fault/gp100.c | 3 + .../gpu/drm/nouveau/nvkm/subdev/fault/priv.h | 7 ++ .../gpu/drm/nouveau/nvkm/subdev/fault/user.c | 106 ++++++++++++++++++ 9 files changed, 142 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/include/nvif/clb069.h create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 214cb6ff93cd..6c2cbb139318 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -54,6 +54,8 @@ #define VOLTA_USERMODE_A 0x0000c361 +#define MAXWELL_FAULT_BUFFER_A /* clb069.h */ 0x0000b069 + #define NV03_CHANNEL_DMA /* cl506b.h */ 0x0000006b #define NV10_CHANNEL_DMA /* cl506b.h */ 0x0000006e #define NV17_CHANNEL_DMA /* cl506b.h */ 0x0000176e diff --git a/drivers/gpu/drm/nouveau/include/nvif/clb069.h b/drivers/gpu/drm/nouveau/include/nvif/clb069.h new file mode 100644 index 000000000000..eef5d0227bab --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/clb069.h @@ -0,0 +1,12 @@ +#ifndef __NVIF_CLB069_H__ +#define __NVIF_CLB069_H__ +struct nvif_clb069_v0 { + __u8 version; + __u8 pad01[3]; + __u32 entries; + __u32 get; + __u32 put; +}; + +#define NVB069_V0_NTFY_FAULT 0x00 +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h index 1cb465acbb4b..97322f95b3ee 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h @@ -13,6 +13,8 @@ struct nvkm_fault { struct nvkm_event event; struct nvkm_notify nrpfb; + + struct nvkm_device_oclass user; }; struct nvkm_fault_data { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 092ddc4ffefa..03c6d9aef075 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -365,16 +365,15 @@ nvkm_udevice_child_get(struct nvkm_object *object, int index, } if (!sclass) { - switch (index) { - case 0: sclass = &nvkm_control_oclass; break; - case 1: - if (!device->mmu) - return -EINVAL; + if (index-- == 0) + sclass = &nvkm_control_oclass; + else if (device->mmu && index-- == 0) sclass = &device->mmu->user; - break; - default: + else if (device->fault && index-- == 0) + sclass = &device->fault->user; + else return -EINVAL; - } + oclass->base = sclass->base; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild index 801fd5c55945..42586267fc08 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild @@ -1,4 +1,5 @@ nvkm-y += nvkm/subdev/fault/base.o +nvkm-y += nvkm/subdev/fault/user.o nvkm-y += nvkm/subdev/fault/gp100.o nvkm-y += nvkm/subdev/fault/gv100.o nvkm-y += nvkm/subdev/fault/tu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c index 4ba1e21e8fda..ca251560d3e0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c @@ -176,5 +176,7 @@ nvkm_fault_new_(const struct nvkm_fault_func *func, struct nvkm_device *device, return -ENOMEM; nvkm_subdev_ctor(&nvkm_fault, device, index, &fault->subdev); fault->func = func; + fault->user.ctor = nvkm_ufault_new; + fault->user.base = func->user.base; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c index 8fb96fe614f9..4f3c4e091117 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c @@ -23,6 +23,8 @@ #include +#include + static void gp100_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable) { @@ -69,6 +71,7 @@ gp100_fault = { .buffer.init = gp100_fault_buffer_init, .buffer.fini = gp100_fault_buffer_fini, .buffer.intr = gp100_fault_buffer_intr, + .user = { { 0, 0, MAXWELL_FAULT_BUFFER_A }, 0 }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h index 8ca8b2876dad..975e66ac6344 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h @@ -34,7 +34,14 @@ struct nvkm_fault_func { void (*fini)(struct nvkm_fault_buffer *); void (*intr)(struct nvkm_fault_buffer *, bool enable); } buffer; + struct { + struct nvkm_sclass base; + int rp; + } user; }; int gv100_fault_oneinit(struct nvkm_fault *); + +int nvkm_ufault_new(struct nvkm_device *, const struct nvkm_oclass *, + void *, u32, struct nvkm_object **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c new file mode 100644 index 000000000000..ac835c9582fd --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c @@ -0,0 +1,106 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +#include +#include + +#include +#include + +static int +nvkm_ufault_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) +{ + struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object); + struct nvkm_device *device = buffer->fault->subdev.device; + *type = NVKM_OBJECT_MAP_IO; + *addr = device->func->resource_addr(device, 3) + buffer->addr; + *size = nvkm_memory_size(buffer->mem); + return 0; +} + +static int +nvkm_ufault_ntfy(struct nvkm_object *object, u32 type, + struct nvkm_event **pevent) +{ + struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object); + if (type == NVB069_V0_NTFY_FAULT) { + *pevent = &buffer->fault->event; + return 0; + } + return -EINVAL; +} + +static int +nvkm_ufault_fini(struct nvkm_object *object, bool suspend) +{ + struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object); + buffer->fault->func->buffer.fini(buffer); + return 0; +} + +static int +nvkm_ufault_init(struct nvkm_object *object) +{ + struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object); + buffer->fault->func->buffer.init(buffer); + return 0; +} + +static void * +nvkm_ufault_dtor(struct nvkm_object *object) +{ + return NULL; +} + +static const struct nvkm_object_func +nvkm_ufault = { + .dtor = nvkm_ufault_dtor, + .init = nvkm_ufault_init, + .fini = nvkm_ufault_fini, + .ntfy = nvkm_ufault_ntfy, + .map = nvkm_ufault_map, +}; + +int +nvkm_ufault_new(struct nvkm_device *device, const struct nvkm_oclass *oclass, + void *argv, u32 argc, struct nvkm_object **pobject) +{ + union { + struct nvif_clb069_v0 v0; + } *args = argv; + struct nvkm_fault *fault = device->fault; + struct nvkm_fault_buffer *buffer = fault->buffer[fault->func->user.rp]; + int ret = -ENOSYS; + + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + args->v0.entries = buffer->entries; + args->v0.get = buffer->get; + args->v0.put = buffer->put; + } else + return ret; + + nvkm_object_ctor(&nvkm_ufault, oclass, &buffer->object); + *pobject = &buffer->object; + return 0; +} From a261a20c0177fa08886cf7941791aaabe48d065d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:48 +1000 Subject: [PATCH 53/60] drm/nouveau/fault/gv100-: expose VoltaFaultBufferA This nvclass exposes the replayable fault buffer, which will be used by SVM to manage GPU page faults. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/class.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/fault/gv100.c | 16 ++++++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/fault/tu102.c | 1 + 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 6c2cbb139318..7d556a1c92fa 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -55,6 +55,7 @@ #define VOLTA_USERMODE_A 0x0000c361 #define MAXWELL_FAULT_BUFFER_A /* clb069.h */ 0x0000b069 +#define VOLTA_FAULT_BUFFER_A /* clb069.h */ 0x0000c369 #define NV03_CHANNEL_DMA /* cl506b.h */ 0x0000006b #define NV10_CHANNEL_DMA /* cl506b.h */ 0x0000006e diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c index 6fc54e17c935..6747f09c2dc3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c @@ -25,6 +25,8 @@ #include #include +#include + static void gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer) { @@ -166,6 +168,13 @@ gv100_fault_intr(struct nvkm_fault *fault) } } + if (stat & 0x08000000) { + if (fault->buffer[1]) { + nvkm_event_send(&fault->event, 1, 1, NULL, 0); + stat &= ~0x08000000; + } + } + if (stat) { nvkm_debug(subdev, "intr %08x\n", stat); } @@ -208,6 +217,13 @@ gv100_fault = { .buffer.init = gv100_fault_buffer_init, .buffer.fini = gv100_fault_buffer_fini, .buffer.intr = gv100_fault_buffer_intr, + /*TODO: Figure out how to expose non-replayable fault buffer, which, + * for some reason, is where recoverable CE faults appear... + * + * It's a bit tricky, as both NVKM and SVM will need access to + * the non-replayable fault buffer. + */ + .user = { { 0, 0, VOLTA_FAULT_BUFFER_A }, 1 }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c index 912425e6238d..fa1dfe5692b0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c @@ -157,6 +157,7 @@ tu102_fault = { .buffer.init = tu102_fault_buffer_init, .buffer.fini = tu102_fault_buffer_fini, .buffer.intr = tu102_fault_buffer_intr, + .user = { { 0, 0, VOLTA_FAULT_BUFFER_A }, 1 }, }; int From bfe91afaca59251fbf5d62143fdd8f740b551302 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 19 Feb 2019 17:21:48 +1000 Subject: [PATCH 54/60] drm/nouveau: prepare for enabling svm with existing userspace interfaces For a channel to make use of SVM features, it requires a different GPU MMU configuration than we would normally use, which is not desirable to switch to unless a client is actively going to use SVM. In order to supporting SVM without more extensive changes to the userspace interfaces, the SVM_INIT ioctl needs to replace the previous configuration safely. The only way we can currently do this safely, accounting for some unlikely failure conditions, is to allocate the new VMM without destroying the last one, and prioritising the SVM-enabled configuration in the code that cares. This will get cleaned up again further down the track. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 3 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_chan.c | 23 +++++++------ drivers/gpu/drm/nouveau/nouveau_chan.h | 1 + drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + drivers/gpu/drm/nouveau/nouveau_drv.h | 1 + drivers/gpu/drm/nouveau/nouveau_fbcon.c | 2 +- drivers/gpu/drm/nouveau/nouveau_gem.c | 43 +++++++++++++------------ drivers/gpu/drm/nouveau/nv84_fence.c | 3 +- 9 files changed, 42 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index a95ec3783f39..c3fd5dd39ed9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -339,7 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) goto done; if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_vma_new(chan->ntfy, &cli->vmm, &chan->ntfy_vma); + ret = nouveau_vma_new(chan->ntfy, chan->chan->vmm, + &chan->ntfy_vma); if (ret) goto done; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index a72be71c45b4..34a998012bf6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -194,7 +194,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; struct nvif_mmu *mmu = &cli->mmu; - struct nvif_vmm *vmm = &cli->vmm.vmm; + struct nvif_vmm *vmm = cli->svm.cli ? &cli->svm.vmm : &cli->vmm.vmm; size_t acc_size; int type = ttm_bo_type_device; int ret, i, pi = -1; diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 668afbc29c3e..5160a0a9c77d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -130,6 +130,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, chan->device = device; chan->drm = drm; + chan->vmm = cli->svm.cli ? &cli->svm : &cli->vmm; atomic_set(&chan->killed, 0); /* allocate memory for dma push buffer */ @@ -157,7 +158,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, chan->push.addr = chan->push.buffer->bo.offset; if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_vma_new(chan->push.buffer, &cli->vmm, + ret = nouveau_vma_new(chan->push.buffer, chan->vmm, &chan->push.vma); if (ret) { nouveau_channel_del(pchan); @@ -172,7 +173,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vmm.vmm.limit - 1; + args.limit = chan->vmm->vmm.limit - 1; } else if (chan->push.buffer->bo.mem.mem_type == TTM_PL_VRAM) { if (device->info.family == NV_DEVICE_INFO_V0_TNT) { @@ -202,7 +203,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_RDWR; args.start = 0; - args.limit = cli->vmm.vmm.limit - 1; + args.limit = chan->vmm->vmm.limit - 1; } } @@ -220,7 +221,6 @@ static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, u64 runlist, bool priv, struct nouveau_channel **pchan) { - struct nouveau_cli *cli = (void *)device->object.client; static const u16 oclasses[] = { TURING_CHANNEL_GPFIFO_A, VOLTA_CHANNEL_GPFIFO_A, PASCAL_CHANNEL_GPFIFO_A, @@ -255,7 +255,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, args.volta.ilength = 0x02000; args.volta.ioffset = 0x10000 + chan->push.addr; args.volta.runlist = runlist; - args.volta.vmm = nvif_handle(&cli->vmm.vmm.object); + args.volta.vmm = nvif_handle(&chan->vmm->vmm.object); args.volta.priv = priv; size = sizeof(args.volta); } else @@ -264,7 +264,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, args.kepler.ilength = 0x02000; args.kepler.ioffset = 0x10000 + chan->push.addr; args.kepler.runlist = runlist; - args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object); + args.kepler.vmm = nvif_handle(&chan->vmm->vmm.object); args.kepler.priv = priv; size = sizeof(args.kepler); } else @@ -272,14 +272,14 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, args.fermi.version = 0; args.fermi.ilength = 0x02000; args.fermi.ioffset = 0x10000 + chan->push.addr; - args.fermi.vmm = nvif_handle(&cli->vmm.vmm.object); + args.fermi.vmm = nvif_handle(&chan->vmm->vmm.object); size = sizeof(args.fermi); } else { args.nv50.version = 0; args.nv50.ilength = 0x02000; args.nv50.ioffset = 0x10000 + chan->push.addr; args.nv50.pushbuf = nvif_handle(&chan->push.ctxdma); - args.nv50.vmm = nvif_handle(&cli->vmm.vmm.object); + args.nv50.vmm = nvif_handle(&chan->vmm->vmm.object); size = sizeof(args.nv50); } @@ -350,7 +350,6 @@ static int nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) { struct nvif_device *device = chan->device; - struct nouveau_cli *cli = (void *)chan->user.client; struct nouveau_drm *drm = chan->drm; struct nv_dma_v0 args = {}; int ret, i; @@ -376,7 +375,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vmm.vmm.limit - 1; + args.limit = chan->vmm->vmm.limit - 1; } else { args.target = NV_DMA_V0_TARGET_VRAM; args.access = NV_DMA_V0_ACCESS_RDWR; @@ -393,7 +392,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; - args.limit = cli->vmm.vmm.limit - 1; + args.limit = chan->vmm->vmm.limit - 1; } else if (chan->drm->agp.bridge) { args.target = NV_DMA_V0_TARGET_AGP; @@ -405,7 +404,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_RDWR; args.start = 0; - args.limit = cli->vmm.vmm.limit - 1; + args.limit = chan->vmm->vmm.limit - 1; } ret = nvif_object_init(&chan->user, gart, NV_DMA_IN_MEMORY, diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index 28418f4e5748..93814d1d31e4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -8,6 +8,7 @@ struct nvif_device; struct nouveau_channel { struct nvif_device *device; struct nouveau_drm *drm; + struct nouveau_vmm *vmm; int chid; u64 inst; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index c1e9069bd2ce..72755d4c3983 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -172,6 +172,7 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); + nouveau_vmm_fini(&cli->svm); nouveau_vmm_fini(&cli->vmm); nvif_mmu_fini(&cli->mmu); nvif_device_fini(&cli->device); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 0ea79c047d8c..8fcff0a4800e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -96,6 +96,7 @@ struct nouveau_cli { struct nvif_device device; struct nvif_mmu mmu; struct nouveau_vmm vmm; + struct nouveau_vmm svm; const struct nvif_mclass *mem; struct list_head head; diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index d275418edd24..0d3cd4e05728 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -353,7 +353,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper, chan = nouveau_nofbaccel ? NULL : drm->channel; if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_vma_new(nvbo, &drm->client.vmm, &fb->vma); + ret = nouveau_vma_new(nvbo, chan->vmm, &fb->vma); if (ret) { NV_ERROR(drm, "failed to map fb into chan: %d\n", ret); chan = NULL; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index fb028e3b5f51..b4bda716564d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -68,10 +68,11 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; + struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm; struct nouveau_vma *vma; int ret; - if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50) + if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50) return 0; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); @@ -82,7 +83,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) if (ret < 0 && ret != -EACCES) goto out; - ret = nouveau_vma_new(nvbo, &cli->vmm, &vma); + ret = nouveau_vma_new(nvbo, vmm, &vma); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); out: @@ -142,17 +143,18 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; + struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : & cli->vmm; struct nouveau_vma *vma; int ret; - if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50) + if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50) return; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); if (ret) return; - vma = nouveau_vma_find(nvbo, &cli->vmm); + vma = nouveau_vma_find(nvbo, vmm); if (vma) { if (--vma->refs == 0) { ret = pm_runtime_get_sync(dev); @@ -219,6 +221,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm; struct nouveau_vma *vma; if (is_power_of_2(nvbo->valid_domains)) @@ -228,8 +231,8 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; rep->offset = nvbo->bo.offset; - if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { - vma = nouveau_vma_find(nvbo, &cli->vmm); + if (vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + vma = nouveau_vma_find(nvbo, vmm); if (!vma) return -EINVAL; @@ -321,7 +324,8 @@ struct validate_op { }; static void -validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence, +validate_fini_no_ticket(struct validate_op *op, struct nouveau_channel *chan, + struct nouveau_fence *fence, struct drm_nouveau_gem_pushbuf_bo *pbbo) { struct nouveau_bo *nvbo; @@ -332,13 +336,11 @@ validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence, b = &pbbo[nvbo->pbbo_index]; if (likely(fence)) { - struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); - struct nouveau_vma *vma; - nouveau_bo_fence(nvbo, fence, !!b->write_domains); - if (drm->client.vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { - vma = (void *)(unsigned long)b->user_priv; + if (chan->vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + struct nouveau_vma *vma = + (void *)(unsigned long)b->user_priv; nouveau_fence_unref(&vma->fence); dma_fence_get(&fence->base); vma->fence = fence; @@ -358,10 +360,11 @@ validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence, } static void -validate_fini(struct validate_op *op, struct nouveau_fence *fence, +validate_fini(struct validate_op *op, struct nouveau_channel *chan, + struct nouveau_fence *fence, struct drm_nouveau_gem_pushbuf_bo *pbbo) { - validate_fini_no_ticket(op, fence, pbbo); + validate_fini_no_ticket(op, chan, fence, pbbo); ww_acquire_fini(&op->ticket); } @@ -416,7 +419,7 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv, list_splice_tail_init(&vram_list, &op->list); list_splice_tail_init(&gart_list, &op->list); list_splice_tail_init(&both_list, &op->list); - validate_fini_no_ticket(op, NULL, NULL); + validate_fini_no_ticket(op, chan, NULL, NULL); if (unlikely(ret == -EDEADLK)) { ret = ttm_bo_reserve_slowpath(&nvbo->bo, true, &op->ticket); @@ -430,8 +433,8 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv, } } - if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { - struct nouveau_vmm *vmm = &cli->vmm; + if (chan->vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + struct nouveau_vmm *vmm = chan->vmm; struct nouveau_vma *vma = nouveau_vma_find(nvbo, vmm); if (!vma) { NV_PRINTK(err, cli, "vma not found!\n"); @@ -471,7 +474,7 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv, list_splice_tail(&gart_list, &op->list); list_splice_tail(&both_list, &op->list); if (ret) - validate_fini(op, NULL, NULL); + validate_fini(op, chan, NULL, NULL); return ret; } @@ -563,7 +566,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, if (unlikely(ret < 0)) { if (ret != -ERESTARTSYS) NV_PRINTK(err, cli, "validating bo list\n"); - validate_fini(op, NULL, NULL); + validate_fini(op, chan, NULL, NULL); return ret; } *apply_relocs = ret; @@ -842,7 +845,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } out: - validate_fini(&op, fence, bo); + validate_fini(&op, chan, fence, bo); nouveau_fence_unref(&fence); out_prevalid: diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index e721bb2163a0..f07da00f285f 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -109,7 +109,6 @@ nv84_fence_context_del(struct nouveau_channel *chan) int nv84_fence_context_new(struct nouveau_channel *chan) { - struct nouveau_cli *cli = (void *)chan->user.client; struct nv84_fence_priv *priv = chan->drm->fence; struct nv84_fence_chan *fctx; int ret; @@ -127,7 +126,7 @@ nv84_fence_context_new(struct nouveau_channel *chan) fctx->base.sequence = nv84_fence_read(chan); mutex_lock(&priv->mutex); - ret = nouveau_vma_new(priv->bo, &cli->vmm, &fctx->vma); + ret = nouveau_vma_new(priv->bo, chan->vmm, &fctx->vma); mutex_unlock(&priv->mutex); if (ret) From eeaf06ac1a5584e41cf289f8351e446bb131374b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Jul 2018 12:57:12 +1000 Subject: [PATCH 55/60] drm/nouveau/svm: initial support for shared virtual memory This uses HMM to mirror a process' CPU page tables into a channel's page tables, and keep them synchronised so that both the CPU and GPU are able to access the same memory at the same virtual address. While this code also supports Volta/Turing, it's only enabled for Pascal GPUs currently due to channel recovery being unreliable right now on the later GPUs. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/Kbuild | 1 + drivers/gpu/drm/nouveau/Kconfig | 11 + drivers/gpu/drm/nouveau/nouveau_chan.c | 9 + drivers/gpu/drm/nouveau/nouveau_drm.c | 7 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 2 + drivers/gpu/drm/nouveau/nouveau_svm.c | 737 +++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_svm.h | 41 ++ drivers/gpu/drm/nouveau/nouveau_vmm.c | 2 + drivers/gpu/drm/nouveau/nouveau_vmm.h | 1 + include/uapi/drm/nouveau_drm.h | 8 + 10 files changed, 818 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nouveau_svm.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_svm.h diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index b17843dd050d..71bd48aafe64 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -30,6 +30,7 @@ nouveau-y += nouveau_vga.o # DRM - memory management nouveau-y += nouveau_bo.o nouveau-y += nouveau_gem.o +nouveau-$(CONFIG_DRM_NOUVEAU_SVM) += nouveau_svm.o nouveau-y += nouveau_mem.o nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 432c440223bb..e7b26a6f386f 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -71,3 +71,14 @@ config DRM_NOUVEAU_BACKLIGHT help Say Y here if you want to control the backlight of your display (e.g. a laptop panel). + +config DRM_NOUVEAU_SVM + bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support" + depends on ARCH_HAS_HMM + depends on DRM_NOUVEAU + depends on STAGING + select HMM_MIRROR + default n + help + Say Y here if you want to enable experimental support for + Shared Virtual Memory (SVM). diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 5160a0a9c77d..282fd90b65e1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -42,6 +42,7 @@ #include "nouveau_fence.h" #include "nouveau_abi16.h" #include "nouveau_vmm.h" +#include "nouveau_svm.h" MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM"); int nouveau_vram_pushbuf; @@ -95,6 +96,10 @@ nouveau_channel_del(struct nouveau_channel **pchan) if (chan->fence) nouveau_fence(chan->drm)->context_del(chan); + + if (cli) + nouveau_svmm_part(chan->vmm->svmm, chan->inst); + nvif_object_fini(&chan->nvsw); nvif_object_fini(&chan->gart); nvif_object_fini(&chan->vram); @@ -494,6 +499,10 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device, nouveau_channel_del(pchan); } + ret = nouveau_svmm_join((*pchan)->vmm->svmm, (*pchan)->inst); + if (ret) + nouveau_channel_del(pchan); + done: cli->base.super = super; return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 72755d4c3983..53fdfa283ee2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -62,6 +62,7 @@ #include "nouveau_usif.h" #include "nouveau_connector.h" #include "nouveau_platform.h" +#include "nouveau_svm.h" MODULE_PARM_DESC(config, "option string to pass to driver core"); static char *nouveau_config; @@ -549,6 +550,7 @@ nouveau_drm_device_init(struct drm_device *dev) nouveau_debugfs_init(drm); nouveau_hwmon_init(dev); + nouveau_svm_init(drm); nouveau_fbcon_init(dev); nouveau_led_init(dev); @@ -592,6 +594,7 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_led_fini(dev); nouveau_fbcon_fini(dev); + nouveau_svm_fini(drm); nouveau_hwmon_fini(dev); nouveau_debugfs_fini(drm); @@ -737,6 +740,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime) struct nouveau_drm *drm = nouveau_drm(dev); int ret; + nouveau_svm_suspend(drm); nouveau_led_suspend(dev); if (dev->mode_config.num_crtc) { @@ -813,7 +817,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime) } nouveau_led_resume(dev); - + nouveau_svm_resume(drm); return 0; } @@ -1033,6 +1037,7 @@ nouveau_ioctls[] = { DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8fcff0a4800e..1326b42f75e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -210,6 +210,8 @@ struct nouveau_drm { bool have_disp_power_ref; struct dev_pm_domain vga_pm_domain; + + struct nouveau_svm *svm; }; static inline struct nouveau_drm * diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c new file mode 100644 index 000000000000..6158e99b1dc3 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -0,0 +1,737 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nouveau_svm.h" +#include "nouveau_drv.h" +#include "nouveau_chan.h" + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +struct nouveau_svm { + struct nouveau_drm *drm; + struct mutex mutex; + struct list_head inst; + + struct nouveau_svm_fault_buffer { + int id; + struct nvif_object object; + u32 entries; + u32 getaddr; + u32 putaddr; + u32 get; + u32 put; + struct nvif_notify notify; + + struct nouveau_svm_fault { + u64 inst; + u64 addr; + u64 time; + u32 engine; + u8 gpc; + u8 hub; + u8 access; + u8 client; + u8 fault; + struct nouveau_svmm *svmm; + } **fault; + int fault_nr; + } buffer[1]; +}; + +#define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a) +#define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a) + +struct nouveau_ivmm { + struct nouveau_svmm *svmm; + u64 inst; + struct list_head head; +}; + +static struct nouveau_ivmm * +nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst) +{ + struct nouveau_ivmm *ivmm; + list_for_each_entry(ivmm, &svm->inst, head) { + if (ivmm->inst == inst) + return ivmm; + } + return NULL; +} + +struct nouveau_svmm { + struct nouveau_vmm *vmm; + struct { + unsigned long start; + unsigned long limit; + } unmanaged; + + struct mutex mutex; + + struct mm_struct *mm; + struct hmm_mirror mirror; +}; + +#define SVMM_DBG(s,f,a...) \ + NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) +#define SVMM_ERR(s,f,a...) \ + NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) + +/* Unlink channel instance from SVMM. */ +void +nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) +{ + struct nouveau_ivmm *ivmm; + if (svmm) { + mutex_lock(&svmm->vmm->cli->drm->svm->mutex); + ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst); + if (ivmm) { + list_del(&ivmm->head); + kfree(ivmm); + } + mutex_unlock(&svmm->vmm->cli->drm->svm->mutex); + } +} + +/* Link channel instance to SVMM. */ +int +nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst) +{ + struct nouveau_ivmm *ivmm; + if (svmm) { + if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL))) + return -ENOMEM; + ivmm->svmm = svmm; + ivmm->inst = inst; + + mutex_lock(&svmm->vmm->cli->drm->svm->mutex); + list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst); + mutex_unlock(&svmm->vmm->cli->drm->svm->mutex); + } + return 0; +} + +/* Invalidate SVMM address-range on GPU. */ +static void +nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) +{ + if (limit > start) { + bool super = svmm->vmm->vmm.object.client->super; + svmm->vmm->vmm.object.client->super = true; + nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR, + &(struct nvif_vmm_pfnclr_v0) { + .addr = start, + .size = limit - start, + }, sizeof(struct nvif_vmm_pfnclr_v0)); + svmm->vmm->vmm.object.client->super = super; + } +} + +static int +nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, + const struct hmm_update *update) +{ + struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror); + unsigned long start = update->start; + unsigned long limit = update->end; + + if (!update->blockable) + return -EAGAIN; + + SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit); + + mutex_lock(&svmm->mutex); + if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) { + if (start < svmm->unmanaged.start) { + nouveau_svmm_invalidate(svmm, start, + svmm->unmanaged.limit); + } + start = svmm->unmanaged.limit; + } + + nouveau_svmm_invalidate(svmm, start, limit); + mutex_unlock(&svmm->mutex); + return 0; +} + +static void +nouveau_svmm_release(struct hmm_mirror *mirror) +{ +} + +static const struct hmm_mirror_ops +nouveau_svmm = { + .sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables, + .release = nouveau_svmm_release, +}; + +void +nouveau_svmm_fini(struct nouveau_svmm **psvmm) +{ + struct nouveau_svmm *svmm = *psvmm; + if (svmm) { + hmm_mirror_unregister(&svmm->mirror); + kfree(*psvmm); + *psvmm = NULL; + } +} + +int +nouveau_svmm_init(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct nouveau_cli *cli = nouveau_cli(file_priv); + struct nouveau_svmm *svmm; + struct drm_nouveau_svm_init *args = data; + int ret; + + /* Allocate tracking for SVM-enabled VMM. */ + if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL))) + return -ENOMEM; + svmm->vmm = &cli->svm; + svmm->unmanaged.start = args->unmanaged_addr; + svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size; + mutex_init(&svmm->mutex); + + /* Check that SVM isn't already enabled for the client. */ + mutex_lock(&cli->mutex); + if (cli->svm.cli) { + ret = -EBUSY; + goto done; + } + + /* Allocate a new GPU VMM that can support SVM (managed by the + * client, with replayable faults enabled). + * + * All future channel/memory allocations will make use of this + * VMM instead of the standard one. + */ + ret = nvif_vmm_init(&cli->mmu, cli->vmm.vmm.object.oclass, true, + args->unmanaged_addr, args->unmanaged_size, + &(struct gp100_vmm_v0) { + .fault_replay = true, + }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm); + if (ret) + goto done; + + /* Enable HMM mirroring of CPU address-space to VMM. */ + svmm->mm = get_task_mm(current); + down_write(&svmm->mm->mmap_sem); + svmm->mirror.ops = &nouveau_svmm; + ret = hmm_mirror_register(&svmm->mirror, svmm->mm); + if (ret == 0) { + cli->svm.svmm = svmm; + cli->svm.cli = cli; + } + up_write(&svmm->mm->mmap_sem); + mmput(svmm->mm); + +done: + if (ret) + nouveau_svmm_fini(&svmm); + mutex_unlock(&cli->mutex); + return ret; +} + +static const u64 +nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = { + [HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V, + [HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W, + [HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM, +}; + +static const u64 +nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = { + [HMM_PFN_ERROR ] = ~NVIF_VMM_PFNMAP_V0_V, + [HMM_PFN_NONE ] = NVIF_VMM_PFNMAP_V0_NONE, + [HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V, +}; + +/* Issue fault replay for GPU to retry accesses that faulted previously. */ +static void +nouveau_svm_fault_replay(struct nouveau_svm *svm) +{ + SVM_DBG(svm, "replay"); + WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object, + GP100_VMM_VN_FAULT_REPLAY, + &(struct gp100_vmm_fault_replay_vn) {}, + sizeof(struct gp100_vmm_fault_replay_vn))); +} + +/* Cancel a replayable fault that could not be handled. + * + * Cancelling the fault will trigger recovery to reset the engine + * and kill the offending channel (ie. GPU SIGSEGV). + */ +static void +nouveau_svm_fault_cancel(struct nouveau_svm *svm, + u64 inst, u8 hub, u8 gpc, u8 client) +{ + SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client); + WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object, + GP100_VMM_VN_FAULT_CANCEL, + &(struct gp100_vmm_fault_cancel_v0) { + .hub = hub, + .gpc = gpc, + .client = client, + .inst = inst, + }, sizeof(struct gp100_vmm_fault_cancel_v0))); +} + +static void +nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm, + struct nouveau_svm_fault *fault) +{ + nouveau_svm_fault_cancel(svm, fault->inst, + fault->hub, + fault->gpc, + fault->client); +} + +static int +nouveau_svm_fault_cmp(const void *a, const void *b) +{ + const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a; + const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b; + int ret; + if ((ret = (s64)fa->inst - fb->inst)) + return ret; + if ((ret = (s64)fa->addr - fb->addr)) + return ret; + /*XXX: atomic? */ + return (fa->access == 0 || fa->access == 3) - + (fb->access == 0 || fb->access == 3); +} + +static void +nouveau_svm_fault_cache(struct nouveau_svm *svm, + struct nouveau_svm_fault_buffer *buffer, u32 offset) +{ + struct nvif_object *memory = &buffer->object; + const u32 instlo = nvif_rd32(memory, offset + 0x00); + const u32 insthi = nvif_rd32(memory, offset + 0x04); + const u32 addrlo = nvif_rd32(memory, offset + 0x08); + const u32 addrhi = nvif_rd32(memory, offset + 0x0c); + const u32 timelo = nvif_rd32(memory, offset + 0x10); + const u32 timehi = nvif_rd32(memory, offset + 0x14); + const u32 engine = nvif_rd32(memory, offset + 0x18); + const u32 info = nvif_rd32(memory, offset + 0x1c); + const u64 inst = (u64)insthi << 32 | instlo; + const u8 gpc = (info & 0x1f000000) >> 24; + const u8 hub = (info & 0x00100000) >> 20; + const u8 client = (info & 0x00007f00) >> 8; + struct nouveau_svm_fault *fault; + + //XXX: i think we're supposed to spin waiting */ + if (WARN_ON(!(info & 0x80000000))) + return; + + nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000); + + if (!buffer->fault[buffer->fault_nr]) { + fault = kmalloc(sizeof(*fault), GFP_KERNEL); + if (WARN_ON(!fault)) { + nouveau_svm_fault_cancel(svm, inst, hub, gpc, client); + return; + } + buffer->fault[buffer->fault_nr] = fault; + } + + fault = buffer->fault[buffer->fault_nr++]; + fault->inst = inst; + fault->addr = (u64)addrhi << 32 | addrlo; + fault->time = (u64)timehi << 32 | timelo; + fault->engine = engine; + fault->gpc = gpc; + fault->hub = hub; + fault->access = (info & 0x000f0000) >> 16; + fault->client = client; + fault->fault = (info & 0x0000001f); + + SVM_DBG(svm, "fault %016llx %016llx %02x", + fault->inst, fault->addr, fault->access); +} + +static int +nouveau_svm_fault(struct nvif_notify *notify) +{ + struct nouveau_svm_fault_buffer *buffer = + container_of(notify, typeof(*buffer), notify); + struct nouveau_svm *svm = + container_of(buffer, typeof(*svm), buffer[buffer->id]); + struct nvif_object *device = &svm->drm->client.device.object; + struct nouveau_svmm *svmm; + struct { + struct { + struct nvif_ioctl_v0 i; + struct nvif_ioctl_mthd_v0 m; + struct nvif_vmm_pfnmap_v0 p; + } i; + u64 phys[16]; + } args; + struct hmm_range range; + struct vm_area_struct *vma; + u64 inst, start, limit; + int fi, fn, pi, fill; + int replay = 0, ret; + + /* Parse available fault buffer entries into a cache, and update + * the GET pointer so HW can reuse the entries. + */ + SVM_DBG(svm, "fault handler"); + if (buffer->get == buffer->put) { + buffer->put = nvif_rd32(device, buffer->putaddr); + buffer->get = nvif_rd32(device, buffer->getaddr); + if (buffer->get == buffer->put) + return NVIF_NOTIFY_KEEP; + } + buffer->fault_nr = 0; + + SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put); + while (buffer->get != buffer->put) { + nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20); + if (++buffer->get == buffer->entries) + buffer->get = 0; + } + nvif_wr32(device, buffer->getaddr, buffer->get); + SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr); + + /* Sort parsed faults by instance pointer to prevent unnecessary + * instance to SVMM translations, followed by address and access + * type to reduce the amount of work when handling the faults. + */ + sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault), + nouveau_svm_fault_cmp, NULL); + + /* Lookup SVMM structure for each unique instance pointer. */ + mutex_lock(&svm->mutex); + for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) { + if (!svmm || buffer->fault[fi]->inst != inst) { + struct nouveau_ivmm *ivmm = + nouveau_ivmm_find(svm, buffer->fault[fi]->inst); + svmm = ivmm ? ivmm->svmm : NULL; + inst = buffer->fault[fi]->inst; + SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm); + } + buffer->fault[fi]->svmm = svmm; + } + mutex_unlock(&svm->mutex); + + /* Process list of faults. */ + args.i.i.version = 0; + args.i.i.type = NVIF_IOCTL_V0_MTHD; + args.i.m.version = 0; + args.i.m.method = NVIF_VMM_V0_PFNMAP; + args.i.p.version = 0; + + for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) { + /* Cancel any faults from non-SVM channels. */ + if (!(svmm = buffer->fault[fi]->svmm)) { + nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); + continue; + } + SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr); + + /* We try and group handling of faults within a small + * window into a single update. + */ + start = buffer->fault[fi]->addr; + limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT); + if (start < svmm->unmanaged.limit) + limit = min_t(u64, limit, svmm->unmanaged.start); + else + if (limit > svmm->unmanaged.start) + start = max_t(u64, start, svmm->unmanaged.limit); + SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit); + + /* Intersect fault window with the CPU VMA, cancelling + * the fault if the address is invalid. + */ + down_read(&svmm->mm->mmap_sem); + vma = find_vma_intersection(svmm->mm, start, limit); + if (!vma) { + SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit); + up_read(&svmm->mm->mmap_sem); + nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); + continue; + } + start = max_t(u64, start, vma->vm_start); + limit = min_t(u64, limit, vma->vm_end); + SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit); + + if (buffer->fault[fi]->addr != start) { + SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr); + up_read(&svmm->mm->mmap_sem); + nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); + continue; + } + + /* Prepare the GPU-side update of all pages within the + * fault window, determining required pages and access + * permissions based on pending faults. + */ + args.i.p.page = PAGE_SHIFT; + args.i.p.addr = start; + for (fn = fi, pi = 0;;) { + /* Determine required permissions based on GPU fault + * access flags. + *XXX: atomic? + */ + if (buffer->fault[fn]->access != 0 /* READ. */ && + buffer->fault[fn]->access != 3 /* PREFETCH. */) { + args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V | + NVIF_VMM_PFNMAP_V0_W; + } else { + args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V; + } + args.i.p.size = pi << PAGE_SHIFT; + + /* It's okay to skip over duplicate addresses from the + * same SVMM as faults are ordered by access type such + * that only the first one needs to be handled. + * + * ie. WRITE faults appear first, thus any handling of + * pending READ faults will already be satisfied. + */ + while (++fn < buffer->fault_nr && + buffer->fault[fn]->svmm == svmm && + buffer->fault[fn ]->addr == + buffer->fault[fn - 1]->addr); + + /* If the next fault is outside the window, or all GPU + * faults have been dealt with, we're done here. + */ + if (fn >= buffer->fault_nr || + buffer->fault[fn]->svmm != svmm || + buffer->fault[fn]->addr >= limit) + break; + + /* Fill in the gap between this fault and the next. */ + fill = (buffer->fault[fn ]->addr - + buffer->fault[fn - 1]->addr) >> PAGE_SHIFT; + while (--fill) + args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE; + } + + SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)", + args.i.p.addr, + args.i.p.addr + args.i.p.size, fn - fi); + + /* Have HMM fault pages within the fault window to the GPU. */ + range.vma = vma; + range.start = args.i.p.addr; + range.end = args.i.p.addr + args.i.p.size; + range.pfns = args.phys; + range.flags = nouveau_svm_pfn_flags; + range.values = nouveau_svm_pfn_values; + range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; +again: + ret = hmm_vma_fault(&range, true); + if (ret == 0) { + mutex_lock(&svmm->mutex); + if (!hmm_vma_range_done(&range)) { + mutex_unlock(&svmm->mutex); + goto again; + } + + svmm->vmm->vmm.object.client->super = true; + ret = nvif_object_ioctl(&svmm->vmm->vmm.object, + &args, sizeof(args.i) + + pi * sizeof(args.phys[0]), + NULL); + svmm->vmm->vmm.object.client->super = false; + mutex_unlock(&svmm->mutex); + } + up_read(&svmm->mm->mmap_sem); + + /* Cancel any faults in the window whose pages didn't manage + * to keep their valid bit, or stay writeable when required. + * + * If handling failed completely, cancel all faults. + */ + while (fi < fn) { + struct nouveau_svm_fault *fault = buffer->fault[fi++]; + pi = (fault->addr - range.start) >> PAGE_SHIFT; + if (ret || + !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) || + (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) && + fault->access != 0 && fault->access != 3)) { + nouveau_svm_fault_cancel_fault(svm, fault); + continue; + } + replay++; + } + } + + /* Issue fault replay to the GPU. */ + if (replay) + nouveau_svm_fault_replay(svm); + return NVIF_NOTIFY_KEEP; +} + +static void +nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id) +{ + struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; + nvif_notify_put(&buffer->notify); +} + +static int +nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id) +{ + struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; + struct nvif_object *device = &svm->drm->client.device.object; + buffer->get = nvif_rd32(device, buffer->getaddr); + buffer->put = nvif_rd32(device, buffer->putaddr); + SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put); + return nvif_notify_get(&buffer->notify); +} + +static void +nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id) +{ + struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; + int i; + + if (buffer->fault) { + for (i = 0; buffer->fault[i] && i < buffer->entries; i++) + kfree(buffer->fault[i]); + kvfree(buffer->fault); + } + + nouveau_svm_fault_buffer_fini(svm, id); + + nvif_notify_fini(&buffer->notify); + nvif_object_fini(&buffer->object); +} + +static int +nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) +{ + struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; + struct nouveau_drm *drm = svm->drm; + struct nvif_object *device = &drm->client.device.object; + struct nvif_clb069_v0 args = {}; + int ret; + + buffer->id = id; + + ret = nvif_object_init(device, 0, oclass, &args, sizeof(args), + &buffer->object); + if (ret < 0) { + SVM_ERR(svm, "Fault buffer allocation failed: %d", ret); + return ret; + } + + nvif_object_map(&buffer->object, NULL, 0); + buffer->entries = args.entries; + buffer->getaddr = args.get; + buffer->putaddr = args.put; + + ret = nvif_notify_init(&buffer->object, nouveau_svm_fault, true, + NVB069_V0_NTFY_FAULT, NULL, 0, 0, + &buffer->notify); + if (ret) + return ret; + + buffer->fault = kvzalloc(sizeof(*buffer->fault) * buffer->entries, GFP_KERNEL); + if (!buffer->fault) + return -ENOMEM; + + return nouveau_svm_fault_buffer_init(svm, id); +} + +void +nouveau_svm_resume(struct nouveau_drm *drm) +{ + struct nouveau_svm *svm = drm->svm; + if (svm) + nouveau_svm_fault_buffer_init(svm, 0); +} + +void +nouveau_svm_suspend(struct nouveau_drm *drm) +{ + struct nouveau_svm *svm = drm->svm; + if (svm) + nouveau_svm_fault_buffer_fini(svm, 0); +} + +void +nouveau_svm_fini(struct nouveau_drm *drm) +{ + struct nouveau_svm *svm = drm->svm; + if (svm) { + nouveau_svm_fault_buffer_dtor(svm, 0); + kfree(drm->svm); + drm->svm = NULL; + } +} + +void +nouveau_svm_init(struct nouveau_drm *drm) +{ + static const struct nvif_mclass buffers[] = { + { VOLTA_FAULT_BUFFER_A, 0 }, + { MAXWELL_FAULT_BUFFER_A, 0 }, + {} + }; + struct nouveau_svm *svm; + int ret; + + /* Disable on Volta and newer until channel recovery is fixed, + * otherwise clients will have a trivial way to trash the GPU + * for everyone. + */ + if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL) + return; + + if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL))) + return; + + drm->svm->drm = drm; + mutex_init(&drm->svm->mutex); + INIT_LIST_HEAD(&drm->svm->inst); + + ret = nvif_mclass(&drm->client.device.object, buffers); + if (ret < 0) { + SVM_DBG(svm, "No supported fault buffer class"); + nouveau_svm_fini(drm); + return; + } + + ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0); + if (ret) { + nouveau_svm_fini(drm); + return; + } + + SVM_DBG(svm, "Initialised"); +} diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h new file mode 100644 index 000000000000..0379a1c316ca --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_svm.h @@ -0,0 +1,41 @@ +#ifndef __NOUVEAU_SVM_H__ +#define __NOUVEAU_SVM_H__ +#include +struct drm_device; +struct drm_file; +struct nouveau_drm; + +struct nouveau_svmm; + +#if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) +void nouveau_svm_init(struct nouveau_drm *); +void nouveau_svm_fini(struct nouveau_drm *); +void nouveau_svm_suspend(struct nouveau_drm *); +void nouveau_svm_resume(struct nouveau_drm *); + +int nouveau_svmm_init(struct drm_device *, void *, struct drm_file *); +void nouveau_svmm_fini(struct nouveau_svmm **); +int nouveau_svmm_join(struct nouveau_svmm *, u64 inst); +void nouveau_svmm_part(struct nouveau_svmm *, u64 inst); +#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ +static inline void nouveau_svm_init(struct nouveau_drm *drm) {} +static inline void nouveau_svm_fini(struct nouveau_drm *drm) {} +static inline void nouveau_svm_suspend(struct nouveau_drm *drm) {} +static inline void nouveau_svm_resume(struct nouveau_drm *drm) {} + +static inline int nouveau_svmm_init(struct drm_device *device, void *p, + struct drm_file *file) +{ + return -ENOSYS; +} + +static inline void nouveau_svmm_fini(struct nouveau_svmm **svmmp) {} + +static inline int nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst) +{ + return 0; +} + +static inline void nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) {} +#endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ +#endif diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 724d02d7c049..77061182a1cf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -22,6 +22,7 @@ #include "nouveau_vmm.h" #include "nouveau_drv.h" #include "nouveau_bo.h" +#include "nouveau_svm.h" #include "nouveau_mem.h" void @@ -119,6 +120,7 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, void nouveau_vmm_fini(struct nouveau_vmm *vmm) { + nouveau_svmm_fini(&vmm->svmm); nvif_vmm_fini(&vmm->vmm); vmm->cli = NULL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h index ede872f6f668..2b98d975f37e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h @@ -25,6 +25,7 @@ void nouveau_vma_unmap(struct nouveau_vma *); struct nouveau_vmm { struct nouveau_cli *cli; struct nvif_vmm vmm; + struct nouveau_svmm *svmm; }; int nouveau_vmm_init(struct nouveau_cli *, s32 oclass, struct nouveau_vmm *); diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 259588a4b61b..afac182c80d8 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -133,12 +133,20 @@ struct drm_nouveau_gem_cpu_fini { #define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC 0x05 /* deprecated */ #define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */ #define DRM_NOUVEAU_NVIF 0x07 +#define DRM_NOUVEAU_SVM_INIT 0x08 #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 #define DRM_NOUVEAU_GEM_CPU_FINI 0x43 #define DRM_NOUVEAU_GEM_INFO 0x44 +struct drm_nouveau_svm_init { + __u64 unmanaged_addr; + __u64 unmanaged_size; +}; + +#define DRM_IOCTL_NOUVEAU_SVM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init) + #define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf) #define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep) From 5be73b690875f7eb2d2defb54ccd7f2f12074984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 26 Jul 2018 17:59:13 -0400 Subject: [PATCH 56/60] drm/nouveau/dmem: device memory helpers for SVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Device memory can be use in SVM, in which case we do not have any of the existing buffer object. This commit add infrastructure to allow use of device memory without nouveau_bo. Again this is a temporary solution until a rework of GPU memory management. Signed-off-by: Jérôme Glisse --- drivers/gpu/drm/nouveau/Kbuild | 1 + drivers/gpu/drm/nouveau/Kconfig | 1 + drivers/gpu/drm/nouveau/nouveau_dmem.c | 912 +++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_dmem.h | 60 ++ drivers/gpu/drm/nouveau/nouveau_drm.c | 5 + drivers/gpu/drm/nouveau/nouveau_drv.h | 2 + drivers/gpu/drm/nouveau/nouveau_svm.c | 2 + 7 files changed, 983 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nouveau_dmem.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_dmem.h diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 71bd48aafe64..581404e6544d 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -31,6 +31,7 @@ nouveau-y += nouveau_vga.o nouveau-y += nouveau_bo.o nouveau-y += nouveau_gem.o nouveau-$(CONFIG_DRM_NOUVEAU_SVM) += nouveau_svm.o +nouveau-$(CONFIG_DRM_NOUVEAU_SVM) += nouveau_dmem.o nouveau-y += nouveau_mem.o nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index e7b26a6f386f..00cd9ab8948d 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -78,6 +78,7 @@ config DRM_NOUVEAU_SVM depends on DRM_NOUVEAU depends on STAGING select HMM_MIRROR + select DEVICE_PRIVATE default n help Say Y here if you want to enable experimental support for diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c new file mode 100644 index 000000000000..b8ba338df1fb --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -0,0 +1,912 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nouveau_dmem.h" +#include "nouveau_drv.h" +#include "nouveau_chan.h" +#include "nouveau_dma.h" +#include "nouveau_mem.h" +#include "nouveau_bo.h" + +#include +#include +#include +#include + +#include +#include + +/* + * FIXME: this is ugly right now we are using TTM to allocate vram and we pin + * it in vram while in use. We likely want to overhaul memory management for + * nouveau to be more page like (not necessarily with system page size but a + * bigger page size) at lowest level and have some shim layer on top that would + * provide the same functionality as TTM. + */ +#define DMEM_CHUNK_SIZE (2UL << 20) +#define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) + +struct nouveau_migrate; + +typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, + u64 dst_addr, u64 src_addr); + +struct nouveau_dmem_chunk { + struct list_head list; + struct nouveau_bo *bo; + struct nouveau_drm *drm; + unsigned long pfn_first; + unsigned long callocated; + unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; + struct nvif_vma vma; + spinlock_t lock; +}; + +struct nouveau_dmem_migrate { + nouveau_migrate_copy_t copy_func; + struct nouveau_channel *chan; +}; + +struct nouveau_dmem { + struct hmm_devmem *devmem; + struct nouveau_dmem_migrate migrate; + struct list_head chunk_free; + struct list_head chunk_full; + struct list_head chunk_empty; + struct mutex mutex; +}; + +struct nouveau_migrate_hmem { + struct scatterlist *sg; + struct nouveau_mem mem; + unsigned long npages; + struct nvif_vma vma; +}; + +struct nouveau_dmem_fault { + struct nouveau_drm *drm; + struct nouveau_fence *fence; + struct nouveau_migrate_hmem hmem; +}; + +struct nouveau_migrate { + struct vm_area_struct *vma; + struct nouveau_drm *drm; + struct nouveau_fence *fence; + unsigned long npages; + struct nouveau_migrate_hmem hmem; +}; + +static void +nouveau_migrate_hmem_fini(struct nouveau_drm *drm, + struct nouveau_migrate_hmem *hmem) +{ + struct nvif_vmm *vmm = &drm->client.vmm.vmm; + + nouveau_mem_fini(&hmem->mem); + nvif_vmm_put(vmm, &hmem->vma); + + if (hmem->sg) { + dma_unmap_sg_attrs(drm->dev->dev, hmem->sg, + hmem->npages, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + kfree(hmem->sg); + hmem->sg = NULL; + } +} + +static int +nouveau_migrate_hmem_init(struct nouveau_drm *drm, + struct nouveau_migrate_hmem *hmem, + unsigned long npages, + const unsigned long *pfns) +{ + struct nvif_vmm *vmm = &drm->client.vmm.vmm; + unsigned long i; + int ret; + + hmem->sg = kzalloc(npages * sizeof(*hmem->sg), GFP_KERNEL); + if (hmem->sg == NULL) + return -ENOMEM; + + for (i = 0, hmem->npages = 0; hmem->npages < npages; ++i) { + struct page *page; + + if (!pfns[i] || pfns[i] == MIGRATE_PFN_ERROR) + continue; + + page = migrate_pfn_to_page(pfns[i]); + if (page == NULL) { + ret = -EINVAL; + goto error; + } + + sg_set_page(&hmem->sg[hmem->npages], page, PAGE_SIZE, 0); + hmem->npages++; + } + sg_mark_end(&hmem->sg[hmem->npages - 1]); + + i = dma_map_sg_attrs(drm->dev->dev, hmem->sg, hmem->npages, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + if (i != hmem->npages) { + ret = -ENOMEM; + goto error; + } + + ret = nouveau_mem_sgl(&hmem->mem, &drm->client, + hmem->npages, hmem->sg); + if (ret) + goto error; + + ret = nvif_vmm_get(vmm, LAZY, false, hmem->mem.mem.page, + 0, hmem->mem.mem.size, &hmem->vma); + if (ret) + goto error; + + ret = nouveau_mem_map(&hmem->mem, vmm, &hmem->vma); + if (ret) + goto error; + + return 0; + +error: + nouveau_migrate_hmem_fini(drm, hmem); + return ret; +} + + +static void +nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page) +{ + struct nouveau_dmem_chunk *chunk; + struct nouveau_drm *drm; + unsigned long idx; + + chunk = (void *)hmm_devmem_page_get_drvdata(page); + idx = page_to_pfn(page) - chunk->pfn_first; + drm = chunk->drm; + + /* + * FIXME: + * + * This is really a bad example, we need to overhaul nouveau memory + * management to be more page focus and allow lighter locking scheme + * to be use in the process. + */ + spin_lock(&chunk->lock); + clear_bit(idx, chunk->bitmap); + WARN_ON(!chunk->callocated); + chunk->callocated--; + /* + * FIXME when chunk->callocated reach 0 we should add the chunk to + * a reclaim list so that it can be freed in case of memory pressure. + */ + spin_unlock(&chunk->lock); +} + +static void +nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, + const unsigned long *src_pfns, + unsigned long *dst_pfns, + unsigned long start, + unsigned long end, + void *private) +{ + struct nouveau_dmem_fault *fault = private; + struct nouveau_drm *drm = fault->drm; + unsigned long addr, i, c, npages = 0; + nouveau_migrate_copy_t copy; + int ret; + + + /* First allocate new memory */ + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { + struct page *dpage, *spage; + + dst_pfns[i] = 0; + spage = migrate_pfn_to_page(src_pfns[i]); + if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) + continue; + + dpage = hmm_vma_alloc_locked_page(vma, addr); + if (!dpage) { + dst_pfns[i] = MIGRATE_PFN_ERROR; + continue; + } + + dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | + MIGRATE_PFN_LOCKED; + npages++; + } + + /* Create scatter list FIXME: get rid of scatter list */ + ret = nouveau_migrate_hmem_init(drm, &fault->hmem, npages, dst_pfns); + if (ret) + goto error; + + /* Copy things over */ + copy = drm->dmem->migrate.copy_func; + for (addr = start, i = c = 0; addr < end; addr += PAGE_SIZE, i++) { + struct nouveau_dmem_chunk *chunk; + struct page *spage, *dpage; + u64 src_addr, dst_addr; + + dpage = migrate_pfn_to_page(dst_pfns[i]); + if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) + continue; + + dst_addr = fault->hmem.vma.addr + (c << PAGE_SHIFT); + c++; + + spage = migrate_pfn_to_page(src_pfns[i]); + if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { + dst_pfns[i] = MIGRATE_PFN_ERROR; + __free_page(dpage); + continue; + } + + chunk = (void *)hmm_devmem_page_get_drvdata(spage); + src_addr = page_to_pfn(spage) - chunk->pfn_first; + src_addr = (src_addr << PAGE_SHIFT) + chunk->vma.addr; + + ret = copy(drm, 1, dst_addr, src_addr); + if (ret) { + dst_pfns[i] = MIGRATE_PFN_ERROR; + __free_page(dpage); + continue; + } + } + + nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); + + return; + +error: + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { + struct page *page; + + if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) + continue; + + page = migrate_pfn_to_page(dst_pfns[i]); + dst_pfns[i] = MIGRATE_PFN_ERROR; + if (page == NULL) + continue; + + __free_page(page); + } +} + +void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, + const unsigned long *src_pfns, + const unsigned long *dst_pfns, + unsigned long start, + unsigned long end, + void *private) +{ + struct nouveau_dmem_fault *fault = private; + struct nouveau_drm *drm = fault->drm; + + if (fault->fence) { + nouveau_fence_wait(fault->fence, true, false); + nouveau_fence_unref(&fault->fence); + } else { + /* + * FIXME wait for channel to be IDLE before calling finalizing + * the hmem object below (nouveau_migrate_hmem_fini()). + */ + } + nouveau_migrate_hmem_fini(drm, &fault->hmem); +} + +static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { + .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, + .finalize_and_map = nouveau_dmem_fault_finalize_and_map, +}; + +static int +nouveau_dmem_fault(struct hmm_devmem *devmem, + struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp) +{ + struct drm_device *drm_dev = dev_get_drvdata(devmem->device); + unsigned long src[1] = {0}, dst[1] = {0}; + struct nouveau_dmem_fault fault = {0}; + int ret; + + + + /* + * FIXME what we really want is to find some heuristic to migrate more + * than just one page on CPU fault. When such fault happens it is very + * likely that more surrounding page will CPU fault too. + */ + fault.drm = nouveau_drm(drm_dev); + ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr, + addr + PAGE_SIZE, src, dst, &fault); + if (ret) + return VM_FAULT_SIGBUS; + + if (dst[0] == MIGRATE_PFN_ERROR) + return VM_FAULT_SIGBUS; + + return 0; +} + +static const struct hmm_devmem_ops +nouveau_dmem_devmem_ops = { + .free = nouveau_dmem_free, + .fault = nouveau_dmem_fault, +}; + +static int +nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) +{ + struct nvif_vmm *vmm = &drm->client.vmm.vmm; + struct nouveau_dmem_chunk *chunk; + int ret; + + if (drm->dmem == NULL) + return -EINVAL; + + mutex_lock(&drm->dmem->mutex); + chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, + struct nouveau_dmem_chunk, + list); + if (chunk == NULL) { + mutex_unlock(&drm->dmem->mutex); + return -ENOMEM; + } + + list_del(&chunk->list); + mutex_unlock(&drm->dmem->mutex); + + ret = nvif_vmm_get(vmm, LAZY, false, 12, 0, + DMEM_CHUNK_SIZE, &chunk->vma); + if (ret) + goto out; + + ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, + TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, + &chunk->bo); + if (ret) + goto out; + + ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); + if (ret) { + nouveau_bo_ref(NULL, &chunk->bo); + goto out; + } + + ret = nouveau_mem_map(nouveau_mem(&chunk->bo->bo.mem), vmm, &chunk->vma); + if (ret) { + nouveau_bo_unpin(chunk->bo); + nouveau_bo_ref(NULL, &chunk->bo); + goto out; + } + + bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); + spin_lock_init(&chunk->lock); + +out: + mutex_lock(&drm->dmem->mutex); + if (chunk->bo) + list_add(&chunk->list, &drm->dmem->chunk_empty); + else + list_add_tail(&chunk->list, &drm->dmem->chunk_empty); + mutex_unlock(&drm->dmem->mutex); + + return ret; +} + +static struct nouveau_dmem_chunk * +nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) +{ + struct nouveau_dmem_chunk *chunk; + + chunk = list_first_entry_or_null(&drm->dmem->chunk_free, + struct nouveau_dmem_chunk, + list); + if (chunk) + return chunk; + + chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, + struct nouveau_dmem_chunk, + list); + if (chunk->bo) + return chunk; + + return NULL; +} + +static int +nouveau_dmem_pages_alloc(struct nouveau_drm *drm, + unsigned long npages, + unsigned long *pages) +{ + struct nouveau_dmem_chunk *chunk; + unsigned long c; + int ret; + + memset(pages, 0xff, npages * sizeof(*pages)); + + mutex_lock(&drm->dmem->mutex); + for (c = 0; c < npages;) { + unsigned long i; + + chunk = nouveau_dmem_chunk_first_free_locked(drm); + if (chunk == NULL) { + mutex_unlock(&drm->dmem->mutex); + ret = nouveau_dmem_chunk_alloc(drm); + if (ret) { + if (c) + break; + return ret; + } + continue; + } + + spin_lock(&chunk->lock); + i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES); + while (i < DMEM_CHUNK_NPAGES && c < npages) { + pages[c] = chunk->pfn_first + i; + set_bit(i, chunk->bitmap); + chunk->callocated++; + c++; + + i = find_next_zero_bit(chunk->bitmap, + DMEM_CHUNK_NPAGES, i); + } + spin_unlock(&chunk->lock); + } + mutex_unlock(&drm->dmem->mutex); + + return 0; +} + +static struct page * +nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) +{ + unsigned long pfns[1]; + struct page *page; + int ret; + + /* FIXME stop all the miss-match API ... */ + ret = nouveau_dmem_pages_alloc(drm, 1, pfns); + if (ret) + return NULL; + + page = pfn_to_page(pfns[0]); + get_page(page); + lock_page(page); + return page; +} + +static void +nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) +{ + unlock_page(page); + put_page(page); +} + +void +nouveau_dmem_resume(struct nouveau_drm *drm) +{ + struct nouveau_dmem_chunk *chunk; + int ret; + + if (drm->dmem == NULL) + return; + + mutex_lock(&drm->dmem->mutex); + list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { + ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); + /* FIXME handle pin failure */ + WARN_ON(ret); + } + list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { + ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); + /* FIXME handle pin failure */ + WARN_ON(ret); + } + list_for_each_entry (chunk, &drm->dmem->chunk_empty, list) { + ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); + /* FIXME handle pin failure */ + WARN_ON(ret); + } + mutex_unlock(&drm->dmem->mutex); +} + +void +nouveau_dmem_suspend(struct nouveau_drm *drm) +{ + struct nouveau_dmem_chunk *chunk; + + if (drm->dmem == NULL) + return; + + mutex_lock(&drm->dmem->mutex); + list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { + nouveau_bo_unpin(chunk->bo); + } + list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { + nouveau_bo_unpin(chunk->bo); + } + list_for_each_entry (chunk, &drm->dmem->chunk_empty, list) { + nouveau_bo_unpin(chunk->bo); + } + mutex_unlock(&drm->dmem->mutex); +} + +void +nouveau_dmem_fini(struct nouveau_drm *drm) +{ + struct nvif_vmm *vmm = &drm->client.vmm.vmm; + struct nouveau_dmem_chunk *chunk, *tmp; + + if (drm->dmem == NULL) + return; + + mutex_lock(&drm->dmem->mutex); + + WARN_ON(!list_empty(&drm->dmem->chunk_free)); + WARN_ON(!list_empty(&drm->dmem->chunk_full)); + + list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) { + if (chunk->bo) { + nouveau_bo_unpin(chunk->bo); + nouveau_bo_ref(NULL, &chunk->bo); + } + nvif_vmm_put(vmm, &chunk->vma); + list_del(&chunk->list); + kfree(chunk); + } + + mutex_unlock(&drm->dmem->mutex); +} + +static int +nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, + u64 dst_addr, u64 src_addr) +{ + struct nouveau_channel *chan = drm->dmem->migrate.chan; + int ret; + + ret = RING_SPACE(chan, 10); + if (ret) + return ret; + + BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); + OUT_RING (chan, upper_32_bits(src_addr)); + OUT_RING (chan, lower_32_bits(src_addr)); + OUT_RING (chan, upper_32_bits(dst_addr)); + OUT_RING (chan, lower_32_bits(dst_addr)); + OUT_RING (chan, PAGE_SIZE); + OUT_RING (chan, PAGE_SIZE); + OUT_RING (chan, PAGE_SIZE); + OUT_RING (chan, npages); + BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386); + return 0; +} + +static int +nouveau_dmem_migrate_init(struct nouveau_drm *drm) +{ + switch (drm->ttm.copy.oclass) { + case PASCAL_DMA_COPY_A: + case PASCAL_DMA_COPY_B: + case VOLTA_DMA_COPY_A: + case TURING_DMA_COPY_A: + drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; + drm->dmem->migrate.chan = drm->ttm.chan; + return 0; + default: + break; + } + return -ENODEV; +} + +void +nouveau_dmem_init(struct nouveau_drm *drm) +{ + struct device *device = drm->dev->dev; + unsigned long i, size; + int ret; + + /* This only make sense on PASCAL or newer */ + if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) + return; + + if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) + return; + + mutex_init(&drm->dmem->mutex); + INIT_LIST_HEAD(&drm->dmem->chunk_free); + INIT_LIST_HEAD(&drm->dmem->chunk_full); + INIT_LIST_HEAD(&drm->dmem->chunk_empty); + + size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE); + + /* Initialize migration dma helpers before registering memory */ + ret = nouveau_dmem_migrate_init(drm); + if (ret) { + kfree(drm->dmem); + drm->dmem = NULL; + return; + } + + /* + * FIXME we need some kind of policy to decide how much VRAM we + * want to register with HMM. For now just register everything + * and latter if we want to do thing like over commit then we + * could revisit this. + */ + drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops, + device, size); + if (drm->dmem->devmem == NULL) { + kfree(drm->dmem); + drm->dmem = NULL; + return; + } + + for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { + struct nouveau_dmem_chunk *chunk; + struct page *page; + unsigned long j; + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (chunk == NULL) { + nouveau_dmem_fini(drm); + return; + } + + chunk->drm = drm; + chunk->pfn_first = drm->dmem->devmem->pfn_first; + chunk->pfn_first += (i * DMEM_CHUNK_NPAGES); + list_add_tail(&chunk->list, &drm->dmem->chunk_empty); + + page = pfn_to_page(chunk->pfn_first); + for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) { + hmm_devmem_page_set_drvdata(page, (long)chunk); + } + } + + NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); +} + +static void +nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, + const unsigned long *src_pfns, + unsigned long *dst_pfns, + unsigned long start, + unsigned long end, + void *private) +{ + struct nouveau_migrate *migrate = private; + struct nouveau_drm *drm = migrate->drm; + unsigned long addr, i, c, npages = 0; + nouveau_migrate_copy_t copy; + int ret; + + /* First allocate new memory */ + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { + struct page *dpage, *spage; + + dst_pfns[i] = 0; + spage = migrate_pfn_to_page(src_pfns[i]); + if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) + continue; + + dpage = nouveau_dmem_page_alloc_locked(drm); + if (!dpage) + continue; + + dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | + MIGRATE_PFN_LOCKED | + MIGRATE_PFN_DEVICE; + npages++; + } + + if (!npages) + return; + + /* Create scatter list FIXME: get rid of scatter list */ + ret = nouveau_migrate_hmem_init(drm, &migrate->hmem, npages, src_pfns); + if (ret) + goto error; + + /* Copy things over */ + copy = drm->dmem->migrate.copy_func; + for (addr = start, i = c = 0; addr < end; addr += PAGE_SIZE, i++) { + struct nouveau_dmem_chunk *chunk; + struct page *spage, *dpage; + u64 src_addr, dst_addr; + + dpage = migrate_pfn_to_page(dst_pfns[i]); + if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) + continue; + + chunk = (void *)hmm_devmem_page_get_drvdata(dpage); + dst_addr = page_to_pfn(dpage) - chunk->pfn_first; + dst_addr = (dst_addr << PAGE_SHIFT) + chunk->vma.addr; + + spage = migrate_pfn_to_page(src_pfns[i]); + if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { + nouveau_dmem_page_free_locked(drm, dpage); + dst_pfns[i] = 0; + continue; + } + + src_addr = migrate->hmem.vma.addr + (c << PAGE_SHIFT); + c++; + + ret = copy(drm, 1, dst_addr, src_addr); + if (ret) { + nouveau_dmem_page_free_locked(drm, dpage); + dst_pfns[i] = 0; + continue; + } + } + + nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); + + return; + +error: + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { + struct page *page; + + if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) + continue; + + page = migrate_pfn_to_page(dst_pfns[i]); + dst_pfns[i] = MIGRATE_PFN_ERROR; + if (page == NULL) + continue; + + __free_page(page); + } +} + +void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, + const unsigned long *src_pfns, + const unsigned long *dst_pfns, + unsigned long start, + unsigned long end, + void *private) +{ + struct nouveau_migrate *migrate = private; + struct nouveau_drm *drm = migrate->drm; + + if (migrate->fence) { + nouveau_fence_wait(migrate->fence, true, false); + nouveau_fence_unref(&migrate->fence); + } else { + /* + * FIXME wait for channel to be IDLE before finalizing + * the hmem object below (nouveau_migrate_hmem_fini()) ? + */ + } + nouveau_migrate_hmem_fini(drm, &migrate->hmem); + + /* + * FIXME optimization: update GPU page table to point to newly + * migrated memory. + */ +} + +static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { + .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, + .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, +}; + +int +nouveau_dmem_migrate_vma(struct nouveau_drm *drm, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + unsigned long *src_pfns, *dst_pfns, npages; + struct nouveau_migrate migrate = {0}; + unsigned long i, c, max; + int ret = 0; + + npages = (end - start) >> PAGE_SHIFT; + max = min(SG_MAX_SINGLE_ALLOC, npages); + src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); + if (src_pfns == NULL) + return -ENOMEM; + dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); + if (dst_pfns == NULL) { + kfree(src_pfns); + return -ENOMEM; + } + + migrate.drm = drm; + migrate.vma = vma; + migrate.npages = npages; + for (i = 0; i < npages; i += c) { + unsigned long next; + + c = min(SG_MAX_SINGLE_ALLOC, npages); + next = start + (c << PAGE_SHIFT); + ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, + next, src_pfns, dst_pfns, &migrate); + if (ret) + goto out; + start = next; + } + +out: + kfree(dst_pfns); + kfree(src_pfns); + return ret; +} + +static inline bool +nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) +{ + if (!is_device_private_page(page)) + return false; + + if (drm->dmem->devmem != page->pgmap->data) + return false; + + return true; +} + +void +nouveau_dmem_convert_pfn(struct nouveau_drm *drm, + struct hmm_range *range) +{ + unsigned long i, npages; + + npages = (range->end - range->start) >> PAGE_SHIFT; + for (i = 0; i < npages; ++i) { + struct nouveau_dmem_chunk *chunk; + struct page *page; + uint64_t addr; + + page = hmm_pfn_to_page(range, range->pfns[i]); + if (page == NULL) + continue; + + if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { + continue; + } + + if (!nouveau_dmem_page(drm, page)) { + WARN(1, "Some unknown device memory !\n"); + range->pfns[i] = 0; + continue; + } + + chunk = (void *)hmm_devmem_page_get_drvdata(page); + addr = page_to_pfn(page) - chunk->pfn_first; + addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; + + range->pfns[i] &= ((1UL << range->pfn_shift) - 1); + range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; + } +} diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h new file mode 100644 index 000000000000..9d97d756fb7d --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h @@ -0,0 +1,60 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __NOUVEAU_DMEM_H__ +#define __NOUVEAU_DMEM_H__ +#include +struct drm_device; +struct drm_file; +struct nouveau_drm; +struct hmm_range; + +#if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) +void nouveau_dmem_init(struct nouveau_drm *); +void nouveau_dmem_fini(struct nouveau_drm *); +void nouveau_dmem_suspend(struct nouveau_drm *); +void nouveau_dmem_resume(struct nouveau_drm *); + +int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end); + +void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, + struct hmm_range *range); +#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ +static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} +static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} +static inline void nouveau_dmem_suspend(struct nouveau_drm *drm) {} +static inline void nouveau_dmem_resume(struct nouveau_drm *drm) {} + +static inline int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + return 0; +} + +static inline void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, + struct hmm_range *range) {} +#endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ +#endif diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 53fdfa283ee2..ee2c4d498d7d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -63,6 +63,7 @@ #include "nouveau_connector.h" #include "nouveau_platform.h" #include "nouveau_svm.h" +#include "nouveau_dmem.h" MODULE_PARM_DESC(config, "option string to pass to driver core"); static char *nouveau_config; @@ -551,6 +552,7 @@ nouveau_drm_device_init(struct drm_device *dev) nouveau_debugfs_init(drm); nouveau_hwmon_init(dev); nouveau_svm_init(drm); + nouveau_dmem_init(drm); nouveau_fbcon_init(dev); nouveau_led_init(dev); @@ -594,6 +596,7 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_led_fini(dev); nouveau_fbcon_fini(dev); + nouveau_dmem_fini(drm); nouveau_svm_fini(drm); nouveau_hwmon_fini(dev); nouveau_debugfs_fini(drm); @@ -741,6 +744,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime) int ret; nouveau_svm_suspend(drm); + nouveau_dmem_suspend(drm); nouveau_led_suspend(dev); if (dev->mode_config.num_crtc) { @@ -817,6 +821,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime) } nouveau_led_resume(dev); + nouveau_dmem_resume(drm); nouveau_svm_resume(drm); return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 1326b42f75e6..da847244479d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -212,6 +212,8 @@ struct nouveau_drm { struct dev_pm_domain vga_pm_domain; struct nouveau_svm *svm; + + struct nouveau_dmem *dmem; }; static inline struct nouveau_drm * diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 6158e99b1dc3..3ba980d80a1a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -561,6 +561,8 @@ nouveau_svm_fault(struct nvif_notify *notify) goto again; } + nouveau_dmem_convert_pfn(svm->drm, &range); + svmm->vmm->vmm.object.client->super = true; ret = nvif_object_ioctl(&svmm->vmm->vmm.object, &args, sizeof(args.i) + From f180bf12ac061f093abb9247505f661817973cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 7 Aug 2018 16:13:16 -0400 Subject: [PATCH 57/60] drm/nouveau/svm: new ioctl to migrate process memory to GPU memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This add an ioctl to migrate a range of process address space to the device memory. On platform without cache coherent bus (x86, ARM, ...) this means that CPU can not access that range directly, instead CPU will fault which will migrate the memory back to system memory. This is behind a staging flag so that we can evolve the API. Signed-off-by: Jérôme Glisse --- drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + drivers/gpu/drm/nouveau/nouveau_svm.c | 96 +++++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_svm.h | 7 ++ include/uapi/drm/nouveau_drm.h | 43 ++++++++++++ 4 files changed, 147 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index ee2c4d498d7d..5020265bfbd9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1043,6 +1043,7 @@ nouveau_ioctls[] = { DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 3ba980d80a1a..93ed43c413f0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -22,6 +22,7 @@ #include "nouveau_svm.h" #include "nouveau_drv.h" #include "nouveau_chan.h" +#include "nouveau_dmem.h" #include #include @@ -104,6 +105,101 @@ struct nouveau_svmm { #define SVMM_ERR(s,f,a...) \ NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) +int +nouveau_svmm_bind(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct nouveau_cli *cli = nouveau_cli(file_priv); + struct drm_nouveau_svm_bind *args = data; + unsigned target, cmd, priority; + unsigned long addr, end, size; + struct mm_struct *mm; + + args->va_start &= PAGE_MASK; + args->va_end &= PAGE_MASK; + + /* Sanity check arguments */ + if (args->reserved0 || args->reserved1) + return -EINVAL; + if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK)) + return -EINVAL; + if (args->va_start >= args->va_end) + return -EINVAL; + if (!args->npages) + return -EINVAL; + + cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT; + cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK; + switch (cmd) { + case NOUVEAU_SVM_BIND_COMMAND__MIGRATE: + break; + default: + return -EINVAL; + } + + priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT; + priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK; + + /* FIXME support CPU target ie all target value < GPU_VRAM */ + target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT; + target &= NOUVEAU_SVM_BIND_TARGET_MASK; + switch (target) { + case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM: + break; + default: + return -EINVAL; + } + + /* + * FIXME: For now refuse non 0 stride, we need to change the migrate + * kernel function to handle stride to avoid to create a mess within + * each device driver. + */ + if (args->stride) + return -EINVAL; + + size = ((unsigned long)args->npages) << PAGE_SHIFT; + if ((args->va_start + size) <= args->va_start) + return -EINVAL; + if ((args->va_start + size) > args->va_end) + return -EINVAL; + + /* + * Ok we are ask to do something sane, for now we only support migrate + * commands but we will add things like memory policy (what to do on + * page fault) and maybe some other commands. + */ + + mm = get_task_mm(current); + down_read(&mm->mmap_sem); + + for (addr = args->va_start, end = args->va_start + size; addr < end;) { + struct vm_area_struct *vma; + unsigned long next; + + vma = find_vma_intersection(mm, addr, end); + if (!vma) + break; + + next = min(vma->vm_end, end); + /* This is a best effort so we ignore errors */ + nouveau_dmem_migrate_vma(cli->drm, vma, addr, next); + addr = next; + } + + /* + * FIXME Return the number of page we have migrated, again we need to + * update the migrate API to return that information so that we can + * report it to user space. + */ + args->result = 0; + + up_read(&mm->mmap_sem); + mmput(mm); + + return 0; +} + /* Unlink channel instance from SVMM. */ void nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h index 0379a1c316ca..e839d8189461 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.h +++ b/drivers/gpu/drm/nouveau/nouveau_svm.h @@ -17,6 +17,7 @@ int nouveau_svmm_init(struct drm_device *, void *, struct drm_file *); void nouveau_svmm_fini(struct nouveau_svmm **); int nouveau_svmm_join(struct nouveau_svmm *, u64 inst); void nouveau_svmm_part(struct nouveau_svmm *, u64 inst); +int nouveau_svmm_bind(struct drm_device *, void *, struct drm_file *); #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ static inline void nouveau_svm_init(struct nouveau_drm *drm) {} static inline void nouveau_svm_fini(struct nouveau_drm *drm) {} @@ -37,5 +38,11 @@ static inline int nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst) } static inline void nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) {} + +static inline int nouveau_svmm_bind(struct drm_device *device, void *p, + struct drm_file *file) +{ + return -ENOSYS; +} #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ #endif diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index afac182c80d8..9459a6e3bc1f 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -134,6 +134,7 @@ struct drm_nouveau_gem_cpu_fini { #define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */ #define DRM_NOUVEAU_NVIF 0x07 #define DRM_NOUVEAU_SVM_INIT 0x08 +#define DRM_NOUVEAU_SVM_BIND 0x09 #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 @@ -145,7 +146,49 @@ struct drm_nouveau_svm_init { __u64 unmanaged_size; }; +struct drm_nouveau_svm_bind { + __u64 header; + __u64 va_start; + __u64 va_end; + __u64 npages; + __u64 stride; + __u64 result; + __u64 reserved0; + __u64 reserved1; +}; + +#define NOUVEAU_SVM_BIND_COMMAND_SHIFT 0 +#define NOUVEAU_SVM_BIND_COMMAND_BITS 8 +#define NOUVEAU_SVM_BIND_COMMAND_MASK ((1 << 8) - 1) +#define NOUVEAU_SVM_BIND_PRIORITY_SHIFT 8 +#define NOUVEAU_SVM_BIND_PRIORITY_BITS 8 +#define NOUVEAU_SVM_BIND_PRIORITY_MASK ((1 << 8) - 1) +#define NOUVEAU_SVM_BIND_TARGET_SHIFT 16 +#define NOUVEAU_SVM_BIND_TARGET_BITS 32 +#define NOUVEAU_SVM_BIND_TARGET_MASK 0xffffffff + +/* + * Below is use to validate ioctl argument, userspace can also use it to make + * sure that no bit are set beyond known fields for a given kernel version. + */ +#define NOUVEAU_SVM_BIND_VALID_BITS 48 +#define NOUVEAU_SVM_BIND_VALID_MASK ((1ULL << NOUVEAU_SVM_BIND_VALID_BITS) - 1) + + +/* + * NOUVEAU_BIND_COMMAND__MIGRATE: synchronous migrate to target memory. + * result: number of page successfuly migrate to the target memory. + */ +#define NOUVEAU_SVM_BIND_COMMAND__MIGRATE 0 + +/* + * NOUVEAU_SVM_BIND_HEADER_TARGET__GPU_VRAM: target the GPU VRAM memory. + */ +#define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM (1UL << 31) + + #define DRM_IOCTL_NOUVEAU_SVM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init) +#define DRM_IOCTL_NOUVEAU_SVM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, struct drm_nouveau_svm_bind) #define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf) From 6c762d1b18d77ea52dab08c94ec219c3a64854e1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 15 Feb 2019 10:35:05 +1000 Subject: [PATCH 58/60] drm/nouveau/dmem: extend copy function to allow direct use of physical addresses Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 56 +++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index b8ba338df1fb..832d078beea2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -46,8 +46,15 @@ struct nouveau_migrate; +enum nouveau_aper { + NOUVEAU_APER_VIRT, + NOUVEAU_APER_VRAM, + NOUVEAU_APER_HOST, +}; + typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, - u64 dst_addr, u64 src_addr); + enum nouveau_aper, u64 dst_addr, + enum nouveau_aper, u64 src_addr); struct nouveau_dmem_chunk { struct list_head list; @@ -267,7 +274,8 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, src_addr = page_to_pfn(spage) - chunk->pfn_first; src_addr = (src_addr << PAGE_SHIFT) + chunk->vma.addr; - ret = copy(drm, 1, dst_addr, src_addr); + ret = copy(drm, 1, NOUVEAU_APER_VIRT, dst_addr, + NOUVEAU_APER_VIRT, src_addr); if (ret) { dst_pfns[i] = MIGRATE_PFN_ERROR; __free_page(dpage); @@ -588,15 +596,49 @@ nouveau_dmem_fini(struct nouveau_drm *drm) static int nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, - u64 dst_addr, u64 src_addr) + enum nouveau_aper dst_aper, u64 dst_addr, + enum nouveau_aper src_aper, u64 src_addr) { struct nouveau_channel *chan = drm->dmem->migrate.chan; + u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ | + (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ | + (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ | + (1 << 2) /* FLUSH_ENABLE_TRUE. */ | + (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */; int ret; - ret = RING_SPACE(chan, 10); + ret = RING_SPACE(chan, 13); if (ret) return ret; + if (src_aper != NOUVEAU_APER_VIRT) { + switch (src_aper) { + case NOUVEAU_APER_VRAM: + BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0); + break; + case NOUVEAU_APER_HOST: + BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1); + break; + default: + return -EINVAL; + } + launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */ + } + + if (dst_aper != NOUVEAU_APER_VIRT) { + switch (dst_aper) { + case NOUVEAU_APER_VRAM: + BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0); + break; + case NOUVEAU_APER_HOST: + BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1); + break; + default: + return -EINVAL; + } + launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */ + } + BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); OUT_RING (chan, upper_32_bits(src_addr)); OUT_RING (chan, lower_32_bits(src_addr)); @@ -606,7 +648,8 @@ nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, OUT_RING (chan, PAGE_SIZE); OUT_RING (chan, PAGE_SIZE); OUT_RING (chan, npages); - BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386); + BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1); + OUT_RING (chan, launch_dma); return 0; } @@ -761,7 +804,8 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, src_addr = migrate->hmem.vma.addr + (c << PAGE_SHIFT); c++; - ret = copy(drm, 1, dst_addr, src_addr); + ret = copy(drm, 1, NOUVEAU_APER_VIRT, dst_addr, + NOUVEAU_APER_VIRT, src_addr); if (ret) { nouveau_dmem_page_free_locked(drm, dpage); dst_pfns[i] = 0; From fd5e985643616a6776f81050422b061c3ff1b8d0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 15 Feb 2019 14:45:57 +1000 Subject: [PATCH 59/60] drm/nouveau/dmem: use physical vram addresses during migration copies Removes the need for temporary VMM mappings. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 832d078beea2..e92f896a1d5d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -63,7 +63,6 @@ struct nouveau_dmem_chunk { unsigned long pfn_first; unsigned long callocated; unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; - struct nvif_vma vma; spinlock_t lock; }; @@ -272,10 +271,10 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, chunk = (void *)hmm_devmem_page_get_drvdata(spage); src_addr = page_to_pfn(spage) - chunk->pfn_first; - src_addr = (src_addr << PAGE_SHIFT) + chunk->vma.addr; + src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; ret = copy(drm, 1, NOUVEAU_APER_VIRT, dst_addr, - NOUVEAU_APER_VIRT, src_addr); + NOUVEAU_APER_VRAM, src_addr); if (ret) { dst_pfns[i] = MIGRATE_PFN_ERROR; __free_page(dpage); @@ -371,7 +370,6 @@ nouveau_dmem_devmem_ops = { static int nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) { - struct nvif_vmm *vmm = &drm->client.vmm.vmm; struct nouveau_dmem_chunk *chunk; int ret; @@ -390,11 +388,6 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) list_del(&chunk->list); mutex_unlock(&drm->dmem->mutex); - ret = nvif_vmm_get(vmm, LAZY, false, 12, 0, - DMEM_CHUNK_SIZE, &chunk->vma); - if (ret) - goto out; - ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, &chunk->bo); @@ -407,13 +400,6 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) goto out; } - ret = nouveau_mem_map(nouveau_mem(&chunk->bo->bo.mem), vmm, &chunk->vma); - if (ret) { - nouveau_bo_unpin(chunk->bo); - nouveau_bo_ref(NULL, &chunk->bo); - goto out; - } - bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); spin_lock_init(&chunk->lock); @@ -570,7 +556,6 @@ nouveau_dmem_suspend(struct nouveau_drm *drm) void nouveau_dmem_fini(struct nouveau_drm *drm) { - struct nvif_vmm *vmm = &drm->client.vmm.vmm; struct nouveau_dmem_chunk *chunk, *tmp; if (drm->dmem == NULL) @@ -586,7 +571,6 @@ nouveau_dmem_fini(struct nouveau_drm *drm) nouveau_bo_unpin(chunk->bo); nouveau_bo_ref(NULL, &chunk->bo); } - nvif_vmm_put(vmm, &chunk->vma); list_del(&chunk->list); kfree(chunk); } @@ -792,7 +776,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, chunk = (void *)hmm_devmem_page_get_drvdata(dpage); dst_addr = page_to_pfn(dpage) - chunk->pfn_first; - dst_addr = (dst_addr << PAGE_SHIFT) + chunk->vma.addr; + dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; spage = migrate_pfn_to_page(src_pfns[i]); if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { @@ -804,7 +788,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, src_addr = migrate->hmem.vma.addr + (c << PAGE_SHIFT); c++; - ret = copy(drm, 1, NOUVEAU_APER_VIRT, dst_addr, + ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, NOUVEAU_APER_VIRT, src_addr); if (ret) { nouveau_dmem_page_free_locked(drm, dpage); From a788ade4f6e0302710f89b2a3534346df752072d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 15 Feb 2019 15:50:16 +1000 Subject: [PATCH 60/60] drm/nouveau/dmem: use dma addresses during migration copies Removes the need for temporary VMM mappings. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 159 +++++++++---------------- 1 file changed, 53 insertions(+), 106 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index e92f896a1d5d..8be7a83ced9b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -80,17 +80,11 @@ struct nouveau_dmem { struct mutex mutex; }; -struct nouveau_migrate_hmem { - struct scatterlist *sg; - struct nouveau_mem mem; - unsigned long npages; - struct nvif_vma vma; -}; - struct nouveau_dmem_fault { struct nouveau_drm *drm; struct nouveau_fence *fence; - struct nouveau_migrate_hmem hmem; + dma_addr_t *dma; + unsigned long npages; }; struct nouveau_migrate { @@ -98,87 +92,10 @@ struct nouveau_migrate { struct nouveau_drm *drm; struct nouveau_fence *fence; unsigned long npages; - struct nouveau_migrate_hmem hmem; + dma_addr_t *dma; + unsigned long dma_nr; }; -static void -nouveau_migrate_hmem_fini(struct nouveau_drm *drm, - struct nouveau_migrate_hmem *hmem) -{ - struct nvif_vmm *vmm = &drm->client.vmm.vmm; - - nouveau_mem_fini(&hmem->mem); - nvif_vmm_put(vmm, &hmem->vma); - - if (hmem->sg) { - dma_unmap_sg_attrs(drm->dev->dev, hmem->sg, - hmem->npages, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - kfree(hmem->sg); - hmem->sg = NULL; - } -} - -static int -nouveau_migrate_hmem_init(struct nouveau_drm *drm, - struct nouveau_migrate_hmem *hmem, - unsigned long npages, - const unsigned long *pfns) -{ - struct nvif_vmm *vmm = &drm->client.vmm.vmm; - unsigned long i; - int ret; - - hmem->sg = kzalloc(npages * sizeof(*hmem->sg), GFP_KERNEL); - if (hmem->sg == NULL) - return -ENOMEM; - - for (i = 0, hmem->npages = 0; hmem->npages < npages; ++i) { - struct page *page; - - if (!pfns[i] || pfns[i] == MIGRATE_PFN_ERROR) - continue; - - page = migrate_pfn_to_page(pfns[i]); - if (page == NULL) { - ret = -EINVAL; - goto error; - } - - sg_set_page(&hmem->sg[hmem->npages], page, PAGE_SIZE, 0); - hmem->npages++; - } - sg_mark_end(&hmem->sg[hmem->npages - 1]); - - i = dma_map_sg_attrs(drm->dev->dev, hmem->sg, hmem->npages, - DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); - if (i != hmem->npages) { - ret = -ENOMEM; - goto error; - } - - ret = nouveau_mem_sgl(&hmem->mem, &drm->client, - hmem->npages, hmem->sg); - if (ret) - goto error; - - ret = nvif_vmm_get(vmm, LAZY, false, hmem->mem.mem.page, - 0, hmem->mem.mem.size, &hmem->vma); - if (ret) - goto error; - - ret = nouveau_mem_map(&hmem->mem, vmm, &hmem->vma); - if (ret) - goto error; - - return 0; - -error: - nouveau_migrate_hmem_fini(drm, hmem); - return ret; -} - - static void nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page) { @@ -218,7 +135,8 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, { struct nouveau_dmem_fault *fault = private; struct nouveau_drm *drm = fault->drm; - unsigned long addr, i, c, npages = 0; + struct device *dev = drm->dev->dev; + unsigned long addr, i, npages = 0; nouveau_migrate_copy_t copy; int ret; @@ -243,14 +161,14 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, npages++; } - /* Create scatter list FIXME: get rid of scatter list */ - ret = nouveau_migrate_hmem_init(drm, &fault->hmem, npages, dst_pfns); - if (ret) + /* Allocate storage for DMA addresses, so we can unmap later. */ + fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); + if (!fault->dma) goto error; /* Copy things over */ copy = drm->dmem->migrate.copy_func; - for (addr = start, i = c = 0; addr < end; addr += PAGE_SIZE, i++) { + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { struct nouveau_dmem_chunk *chunk; struct page *spage, *dpage; u64 src_addr, dst_addr; @@ -259,9 +177,6 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) continue; - dst_addr = fault->hmem.vma.addr + (c << PAGE_SHIFT); - c++; - spage = migrate_pfn_to_page(src_pfns[i]); if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { dst_pfns[i] = MIGRATE_PFN_ERROR; @@ -269,11 +184,23 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, continue; } + fault->dma[fault->npages] = + dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, fault->dma[fault->npages])) { + dst_pfns[i] = MIGRATE_PFN_ERROR; + __free_page(dpage); + continue; + } + + dst_addr = fault->dma[fault->npages++]; + chunk = (void *)hmm_devmem_page_get_drvdata(spage); src_addr = page_to_pfn(spage) - chunk->pfn_first; src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - ret = copy(drm, 1, NOUVEAU_APER_VIRT, dst_addr, + ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, NOUVEAU_APER_VRAM, src_addr); if (ret) { dst_pfns[i] = MIGRATE_PFN_ERROR; @@ -321,7 +248,12 @@ void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, * the hmem object below (nouveau_migrate_hmem_fini()). */ } - nouveau_migrate_hmem_fini(drm, &fault->hmem); + + while (fault->npages--) { + dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + } + kfree(fault->dma); } static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { @@ -732,7 +664,8 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, { struct nouveau_migrate *migrate = private; struct nouveau_drm *drm = migrate->drm; - unsigned long addr, i, c, npages = 0; + struct device *dev = drm->dev->dev; + unsigned long addr, i, npages = 0; nouveau_migrate_copy_t copy; int ret; @@ -758,14 +691,14 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, if (!npages) return; - /* Create scatter list FIXME: get rid of scatter list */ - ret = nouveau_migrate_hmem_init(drm, &migrate->hmem, npages, src_pfns); - if (ret) + /* Allocate storage for DMA addresses, so we can unmap later. */ + migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); + if (!migrate->dma) goto error; /* Copy things over */ copy = drm->dmem->migrate.copy_func; - for (addr = start, i = c = 0; addr < end; addr += PAGE_SIZE, i++) { + for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { struct nouveau_dmem_chunk *chunk; struct page *spage, *dpage; u64 src_addr, dst_addr; @@ -785,11 +718,20 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, continue; } - src_addr = migrate->hmem.vma.addr + (c << PAGE_SHIFT); - c++; + migrate->dma[migrate->dma_nr] = + dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { + nouveau_dmem_page_free_locked(drm, dpage); + dst_pfns[i] = 0; + continue; + } + + src_addr = migrate->dma[migrate->dma_nr++]; ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, - NOUVEAU_APER_VIRT, src_addr); + NOUVEAU_APER_HOST, src_addr); if (ret) { nouveau_dmem_page_free_locked(drm, dpage); dst_pfns[i] = 0; @@ -836,7 +778,12 @@ void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, * the hmem object below (nouveau_migrate_hmem_fini()) ? */ } - nouveau_migrate_hmem_fini(drm, &migrate->hmem); + + while (migrate->dma_nr--) { + dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + } + kfree(migrate->dma); /* * FIXME optimization: update GPU page table to point to newly