summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKary Jin <karyj@nvidia.com>2018-10-15 03:57:52 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-10-29 22:08:48 -0400
commit9d2e50de426ac6362d66f9ccb29a0415322e467f (patch)
treefd7742b653f232a7e4da5ee8e7cf7338f0b5cddf /drivers
parente1c52e46ea383dd280f9d65a4e34422fd9f3637a (diff)
gpu: nvgpu: Add reboot handler
Add a reboot handler to make sure that nvgpu does not try to busy the GPU if the system is going down. If the system is going down then any number of subsystems nvgpu depends on may already have been deinitialized. Bug 200333709 Bug 200454316 Change-Id: I2ceaf7ca4fb88643310874b5b26937ef44c6e3dd Signed-off-by: Kary Jin <karyj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1927018 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vinayak Pane <vpane@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/enabled.h1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/os/linux/module.c38
4 files changed, 44 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 7855493d..9ee5f282 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -424,18 +424,6 @@ done:
424 return err; 424 return err;
425} 425}
426 426
427/*
428 * Check if the device can go busy. Basically if the driver is currently
429 * in the process of dying then do not let new places make the driver busy.
430 */
431int gk20a_can_busy(struct gk20a *g)
432{
433 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
434 return 0;
435 }
436 return 1;
437}
438
439int gk20a_wait_for_idle(struct gk20a *g) 427int gk20a_wait_for_idle(struct gk20a *g)
440{ 428{
441 int wait_length = 150; /* 3 second overall max wait. */ 429 int wait_length = 150; /* 3 second overall max wait. */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 074be0c3..12acf353 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -36,6 +36,7 @@ struct gk20a;
36#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 36#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
37#define NVGPU_FECS_TRACE_VA 4 37#define NVGPU_FECS_TRACE_VA 4
38#define NVGPU_CAN_RAILGATE 5 38#define NVGPU_CAN_RAILGATE 5
39#define NVGPU_KERNEL_IS_DYING 6
39 40
40/* 41/*
41 * ECC flags 42 * ECC flags
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index a256b01f..0424e74d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -49,6 +49,9 @@ struct nvgpu_gpu_ctxsw_trace_filter;
49struct priv_cmd_entry; 49struct priv_cmd_entry;
50struct nvgpu_gpfifo_args; 50struct nvgpu_gpfifo_args;
51 51
52#ifdef __KERNEL__
53#include <linux/notifier.h>
54#endif
52#include <nvgpu/lock.h> 55#include <nvgpu/lock.h>
53#include <nvgpu/thread.h> 56#include <nvgpu/thread.h>
54 57
@@ -1411,6 +1414,10 @@ struct gk20a {
1411 */ 1414 */
1412 unsigned long *enabled_flags; 1415 unsigned long *enabled_flags;
1413 1416
1417#ifdef __KERNEL__
1418 struct notifier_block nvgpu_reboot_nb;
1419#endif
1420
1414 nvgpu_atomic_t usage_count; 1421 nvgpu_atomic_t usage_count;
1415 1422
1416 struct nvgpu_mutex ctxsw_disable_lock; 1423 struct nvgpu_mutex ctxsw_disable_lock;
@@ -1741,7 +1748,6 @@ void gk20a_idle(struct gk20a *g);
1741int __gk20a_do_idle(struct gk20a *g, bool force_reset); 1748int __gk20a_do_idle(struct gk20a *g, bool force_reset);
1742int __gk20a_do_unidle(struct gk20a *g); 1749int __gk20a_do_unidle(struct gk20a *g);
1743 1750
1744int gk20a_can_busy(struct gk20a *g);
1745int gk20a_wait_for_idle(struct gk20a *g); 1751int gk20a_wait_for_idle(struct gk20a *g);
1746 1752
1747#define NVGPU_GPU_ARCHITECTURE_SHIFT 4 1753#define NVGPU_GPU_ARCHITECTURE_SHIFT 4
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index 1fd7f544..955481c8 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -24,6 +24,8 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/pm_runtime.h> 25#include <linux/pm_runtime.h>
26#include <linux/reset.h> 26#include <linux/reset.h>
27#include <linux/reboot.h>
28#include <linux/notifier.h>
27#include <linux/platform/tegra/common.h> 29#include <linux/platform/tegra/common.h>
28#include <linux/pci.h> 30#include <linux/pci.h>
29 31
@@ -76,6 +78,14 @@
76#define CREATE_TRACE_POINTS 78#define CREATE_TRACE_POINTS
77#include <trace/events/gk20a.h> 79#include <trace/events/gk20a.h>
78 80
81static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
82 unsigned long event, void *unused)
83{
84 struct gk20a *g = container_of(nb, struct gk20a, nvgpu_reboot_nb);
85
86 __nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
87 return NOTIFY_DONE;
88}
79 89
80struct device_node *nvgpu_get_node(struct gk20a *g) 90struct device_node *nvgpu_get_node(struct gk20a *g)
81{ 91{
@@ -98,6 +108,22 @@ void gk20a_busy_noresume(struct gk20a *g)
98 pm_runtime_get_noresume(dev_from_gk20a(g)); 108 pm_runtime_get_noresume(dev_from_gk20a(g));
99} 109}
100 110
111/*
112 * Check if the device can go busy.
113 */
114static int nvgpu_can_busy(struct gk20a *g)
115{
116 /* Can't do anything if the system is rebooting/shutting down. */
117 if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING))
118 return 0;
119
120 /* Can't do anything if the driver is restarting. */
121 if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
122 return 0;
123
124 return 1;
125}
126
101int gk20a_busy(struct gk20a *g) 127int gk20a_busy(struct gk20a *g)
102{ 128{
103 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); 129 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
@@ -111,7 +137,7 @@ int gk20a_busy(struct gk20a *g)
111 137
112 down_read(&l->busy_lock); 138 down_read(&l->busy_lock);
113 139
114 if (!gk20a_can_busy(g)) { 140 if (!nvgpu_can_busy(g)) {
115 ret = -ENODEV; 141 ret = -ENODEV;
116 atomic_dec(&g->usage_count.atomic_var); 142 atomic_dec(&g->usage_count.atomic_var);
117 goto fail; 143 goto fail;
@@ -158,7 +184,7 @@ void gk20a_idle(struct gk20a *g)
158 184
159 dev = dev_from_gk20a(g); 185 dev = dev_from_gk20a(g);
160 186
161 if (!(dev && gk20a_can_busy(g))) 187 if (!(dev && nvgpu_can_busy(g)))
162 return; 188 return;
163 189
164 if (pm_runtime_enabled(dev)) { 190 if (pm_runtime_enabled(dev)) {
@@ -1289,6 +1315,12 @@ static int gk20a_probe(struct platform_device *dev)
1289 goto return_err; 1315 goto return_err;
1290 } 1316 }
1291 1317
1318 gk20a->nvgpu_reboot_nb.notifier_call =
1319 nvgpu_kernel_shutdown_notification;
1320 err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);
1321 if (err)
1322 goto return_err;
1323
1292 return 0; 1324 return 0;
1293 1325
1294return_err: 1326return_err:
@@ -1368,6 +1400,8 @@ static int __exit gk20a_remove(struct platform_device *pdev)
1368 1400
1369 err = nvgpu_remove(dev, &nvgpu_class); 1401 err = nvgpu_remove(dev, &nvgpu_class);
1370 1402
1403 unregister_reboot_notifier(&g->nvgpu_reboot_nb);
1404
1371 set_gk20a(pdev, NULL); 1405 set_gk20a(pdev, NULL);
1372 1406
1373 gk20a_put(g); 1407 gk20a_put(g);