From c5f2d00d04f5048e1414f1a2cbe702026528b4db Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 19 Mar 2015 18:59:11 +0530 Subject: gpu: nvgpu: fix deadlock on railgate_lock during race condition We have below race condition during __gk20a_do_idle() and force_reset case : - before execution of __gk20a_do_idle(), a process drops the last usage count of GPU, which triggers GPU railgate process - but before GPU is really railgated (there is 500 mS delay), some process calls __gk20a_do_idle() - in __gk20a_do_idle(), we first take railgate_lock - then we check if GPU is already railgated or not - since it is not railgated yet (due to 500 mS delay), this returns false - then we call pm_runtime_get_noresume() which just increases the usage counter - in this particular case, this call just increases usage count to 1 from 0, but whereas GPU is already on its way to railgate - while we check if GPU usage count drops to one, GPU gets railgated - now if we have force_reset=true case, we will end up calling pm_runtime_get_sync() which will take railgate_lock lock _again_ and try to unrailgate GPU - this causes a deadlock on railgate_lock To fix this, use below sequence : - take railgate_lock - check if GPU is already railgated - release railgate_lock - call pm_runtime_get_sync() which will keep GPU active even if railgating is already triggered - take railgate_lock again to prevent unrailgate in futher process Also, add more descriptive comments to explain the flow Bug 1624537 Change-Id: I0febc65d7bfac03ee738be200cf321322ffbe5a6 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/719625 (cherry picked from commit 480284eda16e2b50ee6368bad3d15574e098b231) Reviewed-on: http://git-master/r/719620 Reviewed-by: Sachin Nikam --- drivers/gpu/nvgpu/gk20a/gk20a.c | 63 ++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index eb6774da..6add8441 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1774,6 +1774,10 @@ void gk20a_reset(struct gk20a *g, u32 units) * __gk20a_do_idle() - force the GPU to idle and railgate * * In success, this call MUST be balanced by caller with __gk20a_do_unidle() + * + * Acquires two locks : &g->busy_lock and &platform->railgate_lock + * In success, we hold these locks and return + * In failure, we release these locks and return */ int __gk20a_do_idle(struct platform_device *pdev, bool force_reset) { @@ -1794,15 +1798,15 @@ int __gk20a_do_idle(struct platform_device *pdev, bool force_reset) if (platform->is_railgated(pdev)) return 0; - /* check if global force_reset flag is set */ - force_reset |= platform->force_reset_in_do_idle; - - /* prevent suspend by incrementing usage counter */ - pm_runtime_get_noresume(&pdev->dev); + /* + * release railgate_lock, prevent suspend by incrementing usage counter, + * re-acquire railgate_lock + */ + mutex_unlock(&platform->railgate_lock); + pm_runtime_get_sync(&pdev->dev); + mutex_lock(&platform->railgate_lock); /* check and wait until GPU is idle (with a timeout) */ - pm_runtime_barrier(&pdev->dev); - do { msleep(1); ref_cnt = atomic_read(&pdev->dev.power.usage_count); @@ -1811,19 +1815,21 @@ int __gk20a_do_idle(struct platform_device *pdev, bool force_reset) if (ref_cnt != 1) { gk20a_err(&pdev->dev, "failed to idle - refcount %d != 1\n", ref_cnt); - goto fail; + goto fail_drop_usage_count; } - /* - * if GPU is now idle, we will have only one ref count - * drop this ref which will rail gate the GPU (if GPU - * railgate is supported) - * if GPU railgate is not supported then we need to - * explicitly reset it - */ - pm_runtime_put_sync(&pdev->dev); + /* check if global force_reset flag is set */ + force_reset |= platform->force_reset_in_do_idle; if (platform->can_railgate && !force_reset) { + /* + * Case 1 : GPU railgate is supported + * + * if GPU is now idle, we will have only one ref count, + * drop this ref which will rail gate the GPU + */ + pm_runtime_put_sync(&pdev->dev); + /* add sufficient delay to allow GPU to rail gate */ msleep(platform->railgate_delay); @@ -1842,12 +1848,24 @@ int __gk20a_do_idle(struct platform_device *pdev, bool force_reset) goto fail_timeout; } } else { + /* + * Case 2 : GPU railgate is not supported or we explicitly + * do not want to do railgate + * + * if GPU is now idle, call prepare_poweroff() to save the + * state and then assert the reset + * + * __gk20a_do_unidle() needs to deassert reset, call + * finalize_poweron(), and then call pm_runtime_put_sync() + * to balance the GPU usage counter + */ if (!platform->reset_assert || !platform->reset_deassert) - goto fail_timeout; + goto fail_drop_usage_count; - pm_runtime_get_sync(&pdev->dev); + /* Save the GPU state */ gk20a_pm_prepare_poweroff(&pdev->dev); + /* assert GPU reset */ platform->reset_assert(pdev); udelay(10); @@ -1856,7 +1874,7 @@ int __gk20a_do_idle(struct platform_device *pdev, bool force_reset) return 0; } -fail: +fail_drop_usage_count: pm_runtime_put_noidle(&pdev->dev); fail_timeout: mutex_unlock(&platform->railgate_lock); @@ -1892,9 +1910,16 @@ int __gk20a_do_unidle(struct platform_device *pdev) struct gk20a_platform *platform = dev_get_drvdata(&pdev->dev); if (g->forced_reset) { + /* + * If we did a reset (and not railgate), + * then deassert the GPU reset here first + */ platform->reset_deassert(pdev); + /* restore the GPU state */ gk20a_pm_finalize_poweron(&pdev->dev); + + /* balance GPU usage counter */ pm_runtime_put_sync(&pdev->dev); g->forced_reset = false; -- cgit v1.2.2