From ea1b69d3f5e419ab1e66340779fda143e8f4ebc3 Mon Sep 17 00:00:00 2001 From: Supriya Date: Thu, 2 Nov 2017 11:54:03 +0530 Subject: gpu: nvgpu: Fix crash on read fail of mc_boot_0_r This CL handles - erroneous use of boot_0 function pointer before being assigned in __nvgpu_check_gpu_state - And proper handling of error returned from gk20a_readl in gk20a_mc_boot_0 With these fixes crash is not seen in case mc_boot_0 read returns 0 in gk20a_mc_boot_0 - And also this handles the recursion caused by mc.boot_0() calling nvgpu_readl and nvgpu_readl in turn calling mc.boot_0 in case of read failure Bug 2010966 Change-Id: Ia087811c67d88948b7fc5fff35e0fabc6ea91989 Signed-off-by: Supriya Reviewed-on: https://git-master.nvidia.com/r/1616274 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/io.c | 12 ++++++++++-- drivers/gpu/nvgpu/gk20a/gk20a.c | 12 +++++++++--- drivers/gpu/nvgpu/gk20a/mc_gk20a.c | 5 ++++- drivers/gpu/nvgpu/include/nvgpu/io.h | 1 + 4 files changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/common/linux/io.c b/drivers/gpu/nvgpu/common/linux/io.c index 729825e7..cde90ddd 100644 --- a/drivers/gpu/nvgpu/common/linux/io.c +++ b/drivers/gpu/nvgpu/common/linux/io.c @@ -32,6 +32,16 @@ void nvgpu_writel(struct gk20a *g, u32 r, u32 v) } u32 nvgpu_readl(struct gk20a *g, u32 r) +{ + u32 v = __nvgpu_readl(g, r); + + if (v == 0xffffffff) + __nvgpu_check_gpu_state(g); + + return v; +} + +u32 __nvgpu_readl(struct gk20a *g, u32 r) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); u32 v = 0xffffffff; @@ -41,8 +51,6 @@ u32 nvgpu_readl(struct gk20a *g, u32 r) gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v); } else { v = readl(l->regs + r); - if (v == 0xffffffff) - __nvgpu_check_gpu_state(g); gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index c72b6193..0ccc8f6c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -54,11 +54,17 @@ void __nvgpu_check_gpu_state(struct gk20a *g) { - u32 boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); + u32 boot_0 = 0xffffffff; + if (!g->ops.mc.boot_0) { + nvgpu_err(g, "Can't determine GPU state, mc.boot_0 unset"); + return; + } + + boot_0 = g->ops.mc.boot_0(g, NULL, NULL, NULL); if (boot_0 == 0xffffffff) { - pr_err("nvgpu: GPU has disappeared from bus!!\n"); - pr_err("nvgpu: Rebooting system!!\n"); + nvgpu_err(g, "GPU has disappeared from bus!!"); + nvgpu_err(g, "Rebooting system!!"); kernel_restart(NULL); } } diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c index 9d9256bd..5027eaa4 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c @@ -200,7 +200,10 @@ void gk20a_mc_reset(struct gk20a *g, u32 units) u32 gk20a_mc_boot_0(struct gk20a *g, u32 *arch, u32 *impl, u32 *rev) { - u32 val = gk20a_readl(g, mc_boot_0_r()); + u32 val = __nvgpu_readl(g, mc_boot_0_r()); + + if (val == 0xffffffff) + return val; if (arch) *arch = mc_boot_0_architecture_v(val) << diff --git a/drivers/gpu/nvgpu/include/nvgpu/io.h b/drivers/gpu/nvgpu/include/nvgpu/io.h index 94ae8f95..b7281b41 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/io.h +++ b/drivers/gpu/nvgpu/include/nvgpu/io.h @@ -40,6 +40,7 @@ struct gk20a; void nvgpu_writel(struct gk20a *g, u32 r, u32 v); u32 nvgpu_readl(struct gk20a *g, u32 r); +u32 __nvgpu_readl(struct gk20a *g, u32 r); void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v); void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v); u32 nvgpu_bar1_readl(struct gk20a *g, u32 b); -- cgit v1.2.2