diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-11-08 14:31:05 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-04 18:53:56 -0500 |
commit | c116522b10a253f68dab8bf89c34a8b9e1be51b9 (patch) | |
tree | 300c02e23d4dbf3d989e9bffd9ae77ea4fbf7d43 | |
parent | 9e2f7d98d4cf2845d3dfea1653f3d6bedd4fb1e6 (diff) |
gpu: nvgpu: Handle driver shutdown more gracefully
Handle possible asynchronous GPU driver shutdown more gracefully.
This occurs when the GPU disappears from the PCI bus, for example,
if it overheats or detects an over current event.
Also add a preprocessor check to make sure that the
gk20a_channel_cancel_pending_sema_waits()
is always defined. In some builds CONFIG_SYNC is disabled but the
gk20a_remove_support() code does not check for this.
Bug 1816516
Bug 1807277
Change-Id: I932e312291c5c6a6ac5e13525ce8ca56a1be3652
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1250028
(cherry picked from commit 337810f8c478238a38d8553c1492622d5fa9aafa)
Reviewed-on: http://git-master/r/1274476
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/pci.c | 20 |
4 files changed, 34 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 063a5457..451b207b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -103,6 +103,13 @@ struct gk20a_channel_sync { | |||
103 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); | 103 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); |
104 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); | 104 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); |
105 | bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); | 105 | bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); |
106 | |||
107 | #ifdef CONFIG_SYNC | ||
106 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); | 108 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); |
109 | #else | ||
110 | static inline void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g) | ||
111 | { | ||
112 | } | ||
113 | #endif | ||
107 | 114 | ||
108 | #endif | 115 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index a65ca6c2..bed728cf 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #include "nvgpu_common.h" | 47 | #include "nvgpu_common.h" |
48 | #include "debug_gk20a.h" | 48 | #include "debug_gk20a.h" |
49 | #include "ctrl_gk20a.h" | 49 | #include "ctrl_gk20a.h" |
50 | #include "channel_sync_gk20a.h" | ||
51 | |||
50 | #include "hw_mc_gk20a.h" | 52 | #include "hw_mc_gk20a.h" |
51 | #include "hw_timer_gk20a.h" | 53 | #include "hw_timer_gk20a.h" |
52 | #include "hw_bus_gk20a.h" | 54 | #include "hw_bus_gk20a.h" |
@@ -66,6 +68,7 @@ | |||
66 | #include "pstate/pstate.h" | 68 | #include "pstate/pstate.h" |
67 | #endif | 69 | #endif |
68 | 70 | ||
71 | |||
69 | #define CREATE_TRACE_POINTS | 72 | #define CREATE_TRACE_POINTS |
70 | #include <trace/events/gk20a.h> | 73 | #include <trace/events/gk20a.h> |
71 | 74 | ||
@@ -708,6 +711,10 @@ void gk20a_remove_support(struct device *dev) | |||
708 | if (g->dbg_regops_tmp_buf) | 711 | if (g->dbg_regops_tmp_buf) |
709 | kfree(g->dbg_regops_tmp_buf); | 712 | kfree(g->dbg_regops_tmp_buf); |
710 | 713 | ||
714 | nvgpu_wait_for_deferred_interrupts(g); | ||
715 | |||
716 | gk20a_channel_cancel_pending_sema_waits(g); | ||
717 | |||
711 | if (g->pmu.remove_support) | 718 | if (g->pmu.remove_support) |
712 | g->pmu.remove_support(&g->pmu); | 719 | g->pmu.remove_support(&g->pmu); |
713 | 720 | ||
@@ -1740,6 +1747,7 @@ void gk20a_busy_noresume(struct device *dev) | |||
1740 | */ | 1747 | */ |
1741 | void gk20a_driver_start_unload(struct gk20a *g) | 1748 | void gk20a_driver_start_unload(struct gk20a *g) |
1742 | { | 1749 | { |
1750 | gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n"); | ||
1743 | g->driver_is_dying = 1; | 1751 | g->driver_is_dying = 1; |
1744 | } | 1752 | } |
1745 | 1753 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index e8c1b30f..dc130e33 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1081,6 +1081,7 @@ enum gk20a_dbg_categories { | |||
1081 | gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */ | 1081 | gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */ |
1082 | gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ | 1082 | gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ |
1083 | gpu_dbg_xv = BIT(18), /* XVE debugging */ | 1083 | gpu_dbg_xv = BIT(18), /* XVE debugging */ |
1084 | gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */ | ||
1084 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | 1085 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ |
1085 | }; | 1086 | }; |
1086 | 1087 | ||
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index 0ba36a30..7cc058e4 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c | |||
@@ -356,10 +356,25 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) | |||
356 | struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev); | 356 | struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev); |
357 | struct gk20a *g = get_gk20a(&pdev->dev); | 357 | struct gk20a *g = get_gk20a(&pdev->dev); |
358 | 358 | ||
359 | if (g->remove_support) | 359 | gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n"); |
360 | g->remove_support(g->dev); | 360 | gk20a_driver_start_unload(g); |
361 | |||
362 | disable_irq(g->irq_stall); | ||
363 | devm_free_irq(&pdev->dev, g->irq_stall, g); | ||
364 | gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n"); | ||
365 | |||
366 | /* | ||
367 | * Wait for the driver to finish up all the IOCTLs it's working on | ||
368 | * before cleaning up the driver's data structures. | ||
369 | */ | ||
370 | gk20a_wait_for_idle(&pdev->dev); | ||
371 | gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n"); | ||
361 | 372 | ||
362 | gk20a_user_deinit(g->dev, &nvgpu_pci_class); | 373 | gk20a_user_deinit(g->dev, &nvgpu_pci_class); |
374 | gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b"); | ||
375 | |||
376 | if (g->remove_support) | ||
377 | g->remove_support(g->dev); | ||
363 | 378 | ||
364 | debugfs_remove_recursive(platform->debugfs); | 379 | debugfs_remove_recursive(platform->debugfs); |
365 | debugfs_remove_recursive(platform->debugfs_alias); | 380 | debugfs_remove_recursive(platform->debugfs_alias); |
@@ -368,6 +383,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) | |||
368 | 383 | ||
369 | if (platform->remove) | 384 | if (platform->remove) |
370 | platform->remove(g->dev); | 385 | platform->remove(g->dev); |
386 | gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b"); | ||
371 | 387 | ||
372 | kfree(g); | 388 | kfree(g); |
373 | } | 389 | } |