diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-11-28 15:48:37 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-12-19 18:40:08 -0500 |
commit | 9e46d3731e5ce555bdc46410220bc5a6d8db6067 (patch) | |
tree | a74d8948bba68c94dad505645236a13dda149eaf /drivers | |
parent | 274e1881af2e4c327fda5e28eb804fe304a2f36e (diff) |
gpu: nvgpu: Check for dead GPU
Check if the GPU is present after each register read. If the a register
read returns 0xffffffff then it's possible the GPU has fallen off the
bus for some reason or another. However, to confirm that a register read
is due to a dead GPU vs just a 0xffffffff being returned by happenstance
the chip ID register is read which should never return 0xffffffff. If
that read returns 0xffffffff as well then certainly the GPU is dead.
Bug 1805082
Bug 1816516
Bug 1807277
Change-Id: I4de61b56289217d9c0d8167e84615a67c8bde8a9
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1239518
(cherry picked from commit bd50828de20aba9b2887ee99c2269602c21a793f)
Reviewed-on: http://git-master/r/1261916
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 7 |
2 files changed, 15 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 156d33ed..b921be7c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -262,6 +262,14 @@ static const struct file_operations gk20a_sched_ops = { | |||
262 | .read = gk20a_sched_dev_read, | 262 | .read = gk20a_sched_dev_read, |
263 | }; | 263 | }; |
264 | 264 | ||
265 | void __nvgpu_check_gpu_state(struct gk20a *g) | ||
266 | { | ||
267 | u32 boot_0 = readl(g->regs + mc_boot_0_r()); | ||
268 | |||
269 | if (boot_0 == 0xffffffff) | ||
270 | pr_err("nvgpu: GPU has disappeared from bus!!\n"); | ||
271 | } | ||
272 | |||
265 | static inline void sim_writel(struct gk20a *g, u32 r, u32 v) | 273 | static inline void sim_writel(struct gk20a *g, u32 r, u32 v) |
266 | { | 274 | { |
267 | writel(v, g->sim.regs+r); | 275 | writel(v, g->sim.regs+r); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 144cb37d..a4cbb4b2 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1111,6 +1111,8 @@ void gk20a_init_clk_ops(struct gpu_ops *gops); | |||
1111 | int gk20a_lockout_registers(struct gk20a *g); | 1111 | int gk20a_lockout_registers(struct gk20a *g); |
1112 | int gk20a_restore_registers(struct gk20a *g); | 1112 | int gk20a_restore_registers(struct gk20a *g); |
1113 | 1113 | ||
1114 | void __nvgpu_check_gpu_state(struct gk20a *g); | ||
1115 | |||
1114 | static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) | 1116 | static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) |
1115 | { | 1117 | { |
1116 | gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); | 1118 | gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); |
@@ -1120,7 +1122,12 @@ static inline void gk20a_writel(struct gk20a *g, u32 r, u32 v) | |||
1120 | static inline u32 gk20a_readl(struct gk20a *g, u32 r) | 1122 | static inline u32 gk20a_readl(struct gk20a *g, u32 r) |
1121 | { | 1123 | { |
1122 | u32 v = readl(g->regs + r); | 1124 | u32 v = readl(g->regs + r); |
1125 | |||
1126 | if (v == 0xffffffff) | ||
1127 | __nvgpu_check_gpu_state(g); | ||
1128 | |||
1123 | gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); | 1129 | gk20a_dbg(gpu_dbg_reg, " r=0x%x v=0x%x", r, v); |
1130 | |||
1124 | return v; | 1131 | return v; |
1125 | } | 1132 | } |
1126 | static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v) | 1133 | static inline void gk20a_writel_check(struct gk20a *g, u32 r, u32 v) |