From ef6ea3475cac013c174905ab4f7f187700ae2a33 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Thu, 7 Sep 2017 16:12:44 -0700 Subject: gpu: nvgpu: Unify remove/shutdown codepaths The following changes are part of the porting of the bind/unbind functionality. These changes reuse the shutdown codepaths in iGPU and dGPU and fix a locking issue with in gk20a_busy() where the usage count can lead to a deadlock during the driver shutdown. It fixes a racing condition with the gr/mm code by invalidating the sw ready flag while holding the busy lock JIRA: EVLR-1739 Change-Id: I62ce47378436b21f447f4cd93388759ed3f9bad1 Signed-off-by: David Nieto Reviewed-on: https://git-master.nvidia.com/r/1554959 Reviewed-by: svc-mobile-coverity Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/common/linux/module.c | 108 +++++++++++++++++++------------- drivers/gpu/nvgpu/common/linux/module.h | 2 + drivers/gpu/nvgpu/common/linux/pci.c | 56 ++++++----------- 3 files changed, 87 insertions(+), 79 deletions(-) (limited to 'drivers/gpu/nvgpu/common/linux') diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 509930c7..46b89ad0 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -640,6 +640,48 @@ static int gk20a_pm_unrailgate(struct device *dev) return ret; } +/* + * Idle the GPU in preparation of shutdown/remove. + * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW + * state to prevent further activity on the driver SW side. + * On driver removal quiesce() should be called after start_unload() + */ +int nvgpu_quiesce(struct gk20a *g) +{ + int err; + struct device *dev = dev_from_gk20a(g); + + err = gk20a_wait_for_idle(g); + if (err) { + nvgpu_err(g, "failed to idle GPU, err=%d", err); + return err; + } + + err = gk20a_fifo_disable_all_engine_activity(g, true); + if (err) { + nvgpu_err(g, "failed to disable engine activity, err=%d", + err); + return err; + } + + err = gk20a_fifo_wait_engine_idle(g); + if (err) { + nvgpu_err(g, "failed to idle engines, err=%d", + err); + return err; + } + + if (gk20a_gpu_is_virtual(dev)) + err = vgpu_pm_prepare_poweroff(dev); + else + err = gk20a_pm_prepare_poweroff(dev); + + if (err) + nvgpu_err(g, "failed to prepare for poweroff, err=%d", + err); + return err; +} + static void gk20a_pm_shutdown(struct platform_device *pdev) { struct gk20a_platform *platform = platform_get_drvdata(pdev); @@ -668,35 +710,9 @@ static void gk20a_pm_shutdown(struct platform_device *pdev) /* Prevent more requests by disabling Runtime PM */ __pm_runtime_disable(&pdev->dev, false); - err = gk20a_wait_for_idle(g); - if (err) { - nvgpu_err(g, "failed to idle GPU, err=%d", err); - goto finish; - } - - err = gk20a_fifo_disable_all_engine_activity(g, true); - if (err) { - nvgpu_err(g, "failed to disable engine activity, err=%d", - err); - goto finish; - } - - err = gk20a_fifo_wait_engine_idle(g); - if (err) { - nvgpu_err(g, "failed to idle engines, err=%d", - err); - goto finish; - } - - if (gk20a_gpu_is_virtual(&pdev->dev)) - err = vgpu_pm_prepare_poweroff(&pdev->dev); - else - err = gk20a_pm_prepare_poweroff(&pdev->dev); - if (err) { - nvgpu_err(g, "failed to prepare for poweroff, err=%d", - err); + err = nvgpu_quiesce(g); + if (err) goto finish; - } err = gk20a_pm_railgate(&pdev->dev); if (err) @@ -854,6 +870,9 @@ void gk20a_driver_start_unload(struct gk20a *g) down_write(&g->busy_lock); __nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); + /* GR SW ready needs to be invalidated at this time with the busy lock + * held to prevent a racing condition on the gr/mm code */ + g->gr.sw_ready = false; up_write(&g->busy_lock); if (g->is_virtual) @@ -979,18 +998,14 @@ static int gk20a_probe(struct platform_device *dev) return 0; } -static int __exit gk20a_remove(struct platform_device *pdev) +int nvgpu_remove(struct device *dev, struct class *class) { - struct device *dev = &pdev->dev; struct gk20a *g = get_gk20a(dev); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct gk20a_platform *platform = gk20a_get_platform(dev); gk20a_dbg_fn(""); - if (gk20a_gpu_is_virtual(dev)) - return vgpu_remove(pdev); - if (platform->has_cde) gk20a_cde_destroy(l); @@ -1001,16 +1016,11 @@ static int __exit gk20a_remove(struct platform_device *pdev) if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) gk20a_scale_exit(dev); - if (g->remove_support) - g->remove_support(g); - - gk20a_ce_destroy(g); - #ifdef CONFIG_ARCH_TEGRA_18x_SOC nvgpu_clk_arb_cleanup_arbiter(g); #endif - gk20a_user_deinit(dev, &nvgpu_class); + gk20a_user_deinit(dev, class); gk20a_debug_deinit(g); @@ -1026,14 +1036,28 @@ static int __exit gk20a_remove(struct platform_device *pdev) if (platform->remove) platform->remove(dev); - set_gk20a(pdev, NULL); - gk20a_put(g); - gk20a_dbg_fn("removed"); return 0; } +static int __exit gk20a_remove(struct platform_device *pdev) +{ + int err; + struct device *dev = &pdev->dev; + struct gk20a *g = get_gk20a(dev); + + if (gk20a_gpu_is_virtual(dev)) + return vgpu_remove(pdev); + + err = nvgpu_remove(dev, &nvgpu_class); + + set_gk20a(pdev, NULL); + gk20a_put(g); + + return err; +} + static struct platform_driver gk20a_driver = { .probe = gk20a_probe, .remove = __exit_p(gk20a_remove), diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h index cfbbc0c7..def98288 100644 --- a/drivers/gpu/nvgpu/common/linux/module.h +++ b/drivers/gpu/nvgpu/common/linux/module.h @@ -19,6 +19,8 @@ struct device; int gk20a_pm_finalize_poweron(struct device *dev); void gk20a_remove_support(struct gk20a *g); void gk20a_driver_start_unload(struct gk20a *g); +int nvgpu_quiesce(struct gk20a *g); +int nvgpu_remove(struct device *dev, struct class *class); extern struct class nvgpu_class; diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 4ea86e7f..f1d12367 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c @@ -513,52 +513,34 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, static void nvgpu_pci_remove(struct pci_dev *pdev) { - struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev); struct gk20a *g = get_gk20a(&pdev->dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + int err; - gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n"); + /* no support yet for unbind if DGPU is in VGPU mode */ + if (gk20a_gpu_is_virtual(dev)) + return; - if (g->irqs_enabled) - disable_irq(g->irq_stall); + /* only idle the GPU if the GPU is powered on */ + if (g->power_on) { + gk20a_driver_start_unload(g); + err = nvgpu_quiesce(g); + /* TODO: handle failure to idle */ + WARN(err, "gpu failed to idle during driver removal"); + } - devm_free_irq(&pdev->dev, g->irq_stall, g); + nvgpu_remove(dev, &nvgpu_pci_class); #if defined(CONFIG_PCI_MSI) - if (g->msi_enabled) { + if (g->msi_enabled) pci_disable_msi(pdev); - g->msi_enabled = false; + else { + /* IRQ does not need to be enabled in MSI as the line is not + * shared + */ + enable_irq(g->irq_stall); } #endif - gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n"); - - /* - * Wait for the driver to finish up all the IOCTLs it's working on - * before cleaning up the driver's data structures. - */ - gk20a_driver_start_unload(g); - gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n"); - -#ifdef CONFIG_ARCH_TEGRA_18x_SOC - nvgpu_clk_arb_cleanup_arbiter(g); -#endif - - gk20a_user_deinit(dev_from_gk20a(g), &nvgpu_pci_class); - gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b"); - -#ifdef CONFIG_DEBUG_FS - debugfs_remove_recursive(l->debugfs); - debugfs_remove_recursive(l->debugfs_alias); -#endif - - nvgpu_remove_sysfs(dev_from_gk20a(g)); - - if (platform->remove) - platform->remove(dev_from_gk20a(g)); - gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b"); - - enable_irq(g->irq_stall); - gk20a_get_platform(&pdev->dev)->g = NULL; gk20a_put(g); } -- cgit v1.2.2