From c116522b10a253f68dab8bf89c34a8b9e1be51b9 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Tue, 8 Nov 2016 11:31:05 -0800 Subject: gpu: nvgpu: Handle driver shutdown more gracefully Handle possible asynchronous GPU driver shutdown more gracefully. This occurs when the GPU disappears from the PCI bus, for example, if it overheats or detects an over current event. Also add a preprocessor check to make sure that the gk20a_channel_cancel_pending_sema_waits() is always defined. In some builds CONFIG_SYNC is disabled but the gk20a_remove_support() code does not check for this. Bug 1816516 Bug 1807277 Change-Id: I932e312291c5c6a6ac5e13525ce8ca56a1be3652 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1250028 (cherry picked from commit 337810f8c478238a38d8553c1492622d5fa9aafa) Reviewed-on: http://git-master/r/1274476 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 7 +++++++ drivers/gpu/nvgpu/gk20a/gk20a.c | 8 ++++++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/pci.c | 20 ++++++++++++++++++-- 4 files changed, 34 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 063a5457..451b207b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -103,6 +103,13 @@ struct gk20a_channel_sync { void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); + +#ifdef CONFIG_SYNC void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); +#else +static inline void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g) +{ +} +#endif #endif diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index a65ca6c2..bed728cf 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -47,6 +47,8 @@ #include "nvgpu_common.h" #include "debug_gk20a.h" #include "ctrl_gk20a.h" +#include "channel_sync_gk20a.h" + #include "hw_mc_gk20a.h" #include "hw_timer_gk20a.h" #include "hw_bus_gk20a.h" @@ -66,6 +68,7 @@ #include "pstate/pstate.h" #endif + #define CREATE_TRACE_POINTS #include @@ -708,6 +711,10 @@ void gk20a_remove_support(struct device *dev) if (g->dbg_regops_tmp_buf) kfree(g->dbg_regops_tmp_buf); + nvgpu_wait_for_deferred_interrupts(g); + + gk20a_channel_cancel_pending_sema_waits(g); + if (g->pmu.remove_support) g->pmu.remove_support(&g->pmu); @@ -1740,6 +1747,7 @@ void gk20a_busy_noresume(struct device *dev) */ void gk20a_driver_start_unload(struct gk20a *g) { + gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n"); g->driver_is_dying = 1; } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index e8c1b30f..dc130e33 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -1081,6 +1081,7 @@ enum gk20a_dbg_categories { gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */ gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ gpu_dbg_xv = BIT(18), /* XVE debugging */ + gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ }; diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index 0ba36a30..7cc058e4 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c @@ -356,10 +356,25 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev); struct gk20a *g = get_gk20a(&pdev->dev); - if (g->remove_support) - g->remove_support(g->dev); + gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n"); + gk20a_driver_start_unload(g); + + disable_irq(g->irq_stall); + devm_free_irq(&pdev->dev, g->irq_stall, g); + gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n"); + + /* + * Wait for the driver to finish up all the IOCTLs it's working on + * before cleaning up the driver's data structures. + */ + gk20a_wait_for_idle(&pdev->dev); + gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n"); gk20a_user_deinit(g->dev, &nvgpu_pci_class); + gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b"); + + if (g->remove_support) + g->remove_support(g->dev); debugfs_remove_recursive(platform->debugfs); debugfs_remove_recursive(platform->debugfs_alias); @@ -368,6 +383,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) if (platform->remove) platform->remove(g->dev); + gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b"); kfree(g); } -- cgit v1.2.2