From 47f6bc0c2e85d0a8ff943b88c81108ca1bfc588e Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Wed, 17 Jul 2019 17:23:42 +0530 Subject: gpu: nvgpu: Fix the race between runtime PM and L2 flush gk20a_mm_l2_flush flushes the L2 cache when "struct gk20a->power_on" is true. But it doesn't acquire power lock when doing that, which creates a race that runtime PM might suspend the GPU in the middle of L2 flush. The FB flush looks having the same issue with L2 flushing. This patch fixes that by calling pm_runtime_get_if_in_use at the beginning of the ioctl. This API from PM does a compare and add to the usage count. If the device was not in use, it simply returns without incrementing the usage count as its unnecessary to wake up the GPU(using e.g. a call to gk20a_busy()) as the caches are flushed when the device would be resumed anyways. Bug 2643951 Change-Id: I2417f7ca3223c722dcb4d9057d32a7e065b9e574 Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/2151532 GVS: Gerrit_Virtual_Submit Reviewed-by: Mark Zhang Reviewed-by: Bibek Basu Reviewed-by: Deepak Nibade Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 1 + drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 13 +++++++++++-- drivers/gpu/nvgpu/os/linux/module.c | 5 +++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index f393e799..7ed4c714 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1747,6 +1747,7 @@ enum { #define GK20A_SIM_IORESOURCE_MEM 2 void gk20a_busy_noresume(struct gk20a *g); +int gk20a_busy_try_noresume(struct gk20a *g); void gk20a_idle_nosuspend(struct gk20a *g); int __must_check gk20a_busy(struct gk20a *g); void gk20a_idle(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 227a7d57..3ccc6b0a 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -568,19 +568,28 @@ static int gk20a_ctrl_get_fbp_l2_masks( static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, struct nvgpu_gpu_l2_fb_args *args) { - int err = 0; + int ret = 0; if ((!args->l2_flush && !args->fb_flush) || (!args->l2_flush && args->l2_invalidate)) return -EINVAL; + ret = gk20a_busy_try_noresume(g); + + /* return if device is already powered off */ + if (ret == 0) + return 0; + if (args->l2_flush) g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); if (args->fb_flush) g->ops.mm.fb_flush(g); - return err; + if (ret > 0) + gk20a_idle_nosuspend(g); + + return 0; } static int nvgpu_gpu_ioctl_set_mmu_debug_mode( diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index c453bced..964fc651 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -109,6 +109,11 @@ void gk20a_busy_noresume(struct gk20a *g) pm_runtime_get_noresume(dev_from_gk20a(g)); } +int gk20a_busy_try_noresume(struct gk20a *g) +{ + return pm_runtime_get_if_in_use(dev_from_gk20a(g)); +} + /* * Check if the device can go busy. */ -- cgit v1.2.2