From 9d2e50de426ac6362d66f9ccb29a0415322e467f Mon Sep 17 00:00:00 2001 From: Kary Jin Date: Mon, 15 Oct 2018 15:57:52 +0800 Subject: gpu: nvgpu: Add reboot handler Add a reboot handler to make sure that nvgpu does not try to busy the GPU if the system is going down. If the system is going down then any number of subsystems nvgpu depends on may already have been deinitialized. Bug 200333709 Bug 200454316 Change-Id: I2ceaf7ca4fb88643310874b5b26937ef44c6e3dd Signed-off-by: Kary Jin Reviewed-on: https://git-master.nvidia.com/r/1927018 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vinayak Pane Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.c | 12 ---------- drivers/gpu/nvgpu/include/nvgpu/enabled.h | 1 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 8 ++++++- drivers/gpu/nvgpu/os/linux/module.c | 38 +++++++++++++++++++++++++++++-- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 7855493d..9ee5f282 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -424,18 +424,6 @@ done: return err; } -/* - * Check if the device can go busy. Basically if the driver is currently - * in the process of dying then do not let new places make the driver busy. - */ -int gk20a_can_busy(struct gk20a *g) -{ - if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { - return 0; - } - return 1; -} - int gk20a_wait_for_idle(struct gk20a *g) { int wait_length = 150; /* 3 second overall max wait. */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 074be0c3..12acf353 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -36,6 +36,7 @@ struct gk20a; #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 #define NVGPU_FECS_TRACE_VA 4 #define NVGPU_CAN_RAILGATE 5 +#define NVGPU_KERNEL_IS_DYING 6 /* * ECC flags diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index a256b01f..0424e74d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -49,6 +49,9 @@ struct nvgpu_gpu_ctxsw_trace_filter; struct priv_cmd_entry; struct nvgpu_gpfifo_args; +#ifdef __KERNEL__ +#include +#endif #include #include @@ -1411,6 +1414,10 @@ struct gk20a { */ unsigned long *enabled_flags; +#ifdef __KERNEL__ + struct notifier_block nvgpu_reboot_nb; +#endif + nvgpu_atomic_t usage_count; struct nvgpu_mutex ctxsw_disable_lock; @@ -1741,7 +1748,6 @@ void gk20a_idle(struct gk20a *g); int __gk20a_do_idle(struct gk20a *g, bool force_reset); int __gk20a_do_unidle(struct gk20a *g); -int gk20a_can_busy(struct gk20a *g); int gk20a_wait_for_idle(struct gk20a *g); #define NVGPU_GPU_ARCHITECTURE_SHIFT 4 diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 1fd7f544..955481c8 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include @@ -76,6 +78,14 @@ #define CREATE_TRACE_POINTS #include +static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb, + unsigned long event, void *unused) +{ + struct gk20a *g = container_of(nb, struct gk20a, nvgpu_reboot_nb); + + __nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); + return NOTIFY_DONE; +} struct device_node *nvgpu_get_node(struct gk20a *g) { @@ -98,6 +108,22 @@ void gk20a_busy_noresume(struct gk20a *g) pm_runtime_get_noresume(dev_from_gk20a(g)); } +/* + * Check if the device can go busy. + */ +static int nvgpu_can_busy(struct gk20a *g) +{ + /* Can't do anything if the system is rebooting/shutting down. */ + if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING)) + return 0; + + /* Can't do anything if the driver is restarting. */ + if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) + return 0; + + return 1; +} + int gk20a_busy(struct gk20a *g) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); @@ -111,7 +137,7 @@ int gk20a_busy(struct gk20a *g) down_read(&l->busy_lock); - if (!gk20a_can_busy(g)) { + if (!nvgpu_can_busy(g)) { ret = -ENODEV; atomic_dec(&g->usage_count.atomic_var); goto fail; @@ -158,7 +184,7 @@ void gk20a_idle(struct gk20a *g) dev = dev_from_gk20a(g); - if (!(dev && gk20a_can_busy(g))) + if (!(dev && nvgpu_can_busy(g))) return; if (pm_runtime_enabled(dev)) { @@ -1289,6 +1315,12 @@ static int gk20a_probe(struct platform_device *dev) goto return_err; } + gk20a->nvgpu_reboot_nb.notifier_call = + nvgpu_kernel_shutdown_notification; + err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb); + if (err) + goto return_err; + return 0; return_err: @@ -1368,6 +1400,8 @@ static int __exit gk20a_remove(struct platform_device *pdev) err = nvgpu_remove(dev, &nvgpu_class); + unregister_reboot_notifier(&g->nvgpu_reboot_nb); + set_gk20a(pdev, NULL); gk20a_put(g); -- cgit v1.2.2