From 90568a2ce58c03f457bdd4fab6675cd327ed13fd Mon Sep 17 00:00:00 2001 From: David Nieto Date: Tue, 18 Apr 2017 21:56:09 -0700 Subject: gpu: nvgpu: allow bind to be interrupted This change solves two problems: (*) the possibility of a crash due to interrupting the gpu initialization following a bind (*) a IOVA memory leak that could prevent the GPU from binding after about 200 bind/unbind cycles A detailed list of fixes: - chek that arbiter is initialized before freeing it. - do not re-enable interrupts when MSI is enabled on unbind. - free the semaphore sea on unbind. - ensure we dont double load the vbios. - check return value of nvgpu_mutex_init for semaphores. - add corresponding nvgpu_mutex_destroy calls. bug 1816516 Change-Id: Ia8af73019e0e1183998855d55bb3eea09672a8b7 Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1465302 Reviewed-by: Alex Waterman Reviewed-by: Thomas Fleury Reviewed-by: David Jarrett Reviewed-on: https://git-master.nvidia.com/r/1563019 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/clk/clk_arb.c | 4 ++-- drivers/gpu/nvgpu/common/semaphore.c | 3 ++- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 2 +- drivers/gpu/nvgpu/gp106/bios_gp106.c | 28 ++++++++++++++++++++++------ drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 2 +- 6 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 4f09da74..0df9545a 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -490,9 +490,9 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); } nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + g->clk_arb = NULL; } - nvgpu_kfree(g, g->clk_arb); - g->clk_arb = NULL; } static int nvgpu_clk_arb_install_fd(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 5496f5ec..0d512e8f 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c @@ -66,11 +66,12 @@ out: return ret; } -void gk20a_semaphore_sea_destroy(struct gk20a *g) +void nvgpu_semaphore_sea_destroy(struct gk20a *g) { if (!g->sema_sea) return; + nvgpu_dma_free(g, &g->sema_sea->sea_mem); nvgpu_mutex_destroy(&g->sema_sea->sea_lock); nvgpu_kfree(g, g->sema_sea); g->sema_sea = NULL; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 13c62691..0cd77d1e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -1233,6 +1233,7 @@ struct gk20a { #ifdef CONFIG_DEBUG_FS struct debugfs_blob_wrapper bios_blob; #endif + bool bios_is_init; struct nvgpu_clk_arb *clk_arb; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 0e0326dd..dd8b900d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -393,7 +393,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) nvgpu_vm_put(mm->pmu.vm); nvgpu_vm_put(mm->cde.vm); - gk20a_semaphore_sea_destroy(g); + nvgpu_semaphore_sea_destroy(g); gk20a_vidmem_destroy(g); nvgpu_pd_cache_fini(g); } diff --git a/drivers/gpu/nvgpu/gp106/bios_gp106.c b/drivers/gpu/nvgpu/gp106/bios_gp106.c index f772e267..c728433f 100644 --- a/drivers/gpu/nvgpu/gp106/bios_gp106.c +++ b/drivers/gpu/nvgpu/gp106/bios_gp106.c @@ -177,6 +177,9 @@ int gp106_bios_init(struct gk20a *g) gk20a_dbg_fn(""); + if (g->bios_is_init) + return 0; + gk20a_dbg_info("reading bios from EEPROM"); g->bios.size = BIOS_SIZE; g->bios.data = nvgpu_vmalloc(g, BIOS_SIZE); @@ -195,12 +198,13 @@ int gp106_bios_init(struct gk20a *g) err = nvgpu_bios_parse_rom(g); if (err) - return err; + goto free_firmware; if (g->gpu_characteristics.vbios_version < g->vbios_min_version) { nvgpu_err(g, "unsupported VBIOS version %08x", g->gpu_characteristics.vbios_version); - return -EINVAL; + err = -EINVAL; + goto free_firmware; } /* WAR for HW2.5 RevA (INA3221 is missing) */ @@ -216,25 +220,37 @@ int gp106_bios_init(struct gk20a *g) d = debugfs_create_blob("bios", S_IRUGO, l->debugfs, &g->bios_blob); - if (!d) + if (!d) { + err = -EINVAL; nvgpu_err(g, "No debugfs?"); + goto free_firmware; + } #endif - gk20a_dbg_fn("done"); err = gp106_bios_devinit(g); if (err) { nvgpu_err(g, "devinit failed"); - return err; + goto free_debugfs; } if (nvgpu_is_enabled(g, NVGPU_PMU_RUN_PREOS)) { err = gp106_bios_preos(g); if (err) { nvgpu_err(g, "pre-os failed"); - return err; + goto free_debugfs; } } + g->bios_is_init = true; return 0; +free_debugfs: +#ifdef CONFIG_DEBUG_FS + debugfs_remove(d); +#endif +free_firmware: + if (g->bios.data) + nvgpu_vfree(g, g->bios.data); + return err; } + diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 5c0019ae..8915b722 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -160,7 +160,7 @@ struct nvgpu_semaphore_sea { * Semaphore sea functions. */ struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *gk20a); -void gk20a_semaphore_sea_destroy(struct gk20a *g); +void nvgpu_semaphore_sea_destroy(struct gk20a *g); int nvgpu_semaphore_sea_map(struct nvgpu_semaphore_pool *sea, struct vm_gk20a *vm); void nvgpu_semaphore_sea_unmap(struct nvgpu_semaphore_pool *sea, -- cgit v1.2.2