From aa7ee8dac0cb29ee3244c7eef77eac8e7fc34dba Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 27 Mar 2018 11:52:27 -0700 Subject: gpu: nvgpu: enhance pbus error reporting -Dump timeout save0 and save1 even if they could be unreliable when fecs_tgt in set in save0 . This is good to have for debug purposes. -Add priv_ring hal for decode_error_code -Decode fecs error code for supported error types Bug 1998067 Change-Id: I60cb6902d099df4a7df45fa624e44d9e0d46360f Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1683014 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/bus_gk20a.c | 44 ++++++++++++++++++------------- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 ++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 + drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c | 9 ++++--- drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.h | 4 ++- drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 + 8 files changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index b2800772..60481544 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -65,7 +65,7 @@ void gk20a_bus_init_hw(struct gk20a *g) void gk20a_bus_isr(struct gk20a *g) { - u32 val, save0, save1, err_code; + u32 val, save0, save1, fecs_errcode = 0; val = gk20a_readl(g, bus_intr_0_r()); @@ -78,29 +78,37 @@ void gk20a_bus_isr(struct gk20a *g) save0 = gk20a_readl(g, timer_pri_timeout_save_0_r()); if (timer_pri_timeout_save_0_fecs_tgt_v(save0)) { - - err_code = gk20a_readl(g, + /* + * write & addr fields in timeout_save0 + * might not be reliable + */ + fecs_errcode = gk20a_readl(g, timer_pri_timeout_fecs_errcode_r()); - /* write and addr fields are not reliable */ - nvgpu_err(g, "NV_PBUS_INTR_0: 0x%08x " - "FECS_ERRCODE 0x%08x", val, err_code); + } + + save1 = gk20a_readl(g, timer_pri_timeout_save_1_r()); + nvgpu_err(g, "NV_PBUS_INTR_0: 0x%08x ADR 0x%08x " + "%s DATA 0x%08x ", + val, + timer_pri_timeout_save_0_addr_v(save0) << 2, + timer_pri_timeout_save_0_write_v(save0) ? + "WRITE" : "READ", save1); - if ((err_code & 0xffffff00) == 0xbadf1300) + gk20a_writel(g, timer_pri_timeout_save_0_r(), 0); + gk20a_writel(g, timer_pri_timeout_save_1_r(), 0); + + if (fecs_errcode) { + nvgpu_err(g, "FECS_ERRCODE 0x%08x", fecs_errcode); + if (g->ops.priv_ring.decode_error_code) + g->ops.priv_ring.decode_error_code(g, + fecs_errcode); + + if ((fecs_errcode & 0xffffff00) == 0xbadf1300) nvgpu_err(g, "NV_PGRAPH_PRI_GPC0_GPCCS_FS_GPC: " "0x%08x", gk20a_readl(g, gr_gpc0_fs_gpc_r())); - } else { - save1 = gk20a_readl(g, timer_pri_timeout_save_1_r()); - nvgpu_err(g, "NV_PBUS_INTR_0: 0x%08x ADR 0x%08x " - "R/W %s DATA 0x%08x", - val, - timer_pri_timeout_save_0_addr_v(save0) << 2, - timer_pri_timeout_save_0_write_v(save0) ? - "WRITE" : "READ", save1); } - gk20a_writel(g, timer_pri_timeout_save_0_r(), 0); - gk20a_writel(g, timer_pri_timeout_save_1_r(), 0); } else { nvgpu_err(g, "Unhandled NV_PBUS_INTR_0: 0x%08x", val); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 95736d30..7162dc73 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -1113,6 +1113,7 @@ struct gpu_ops { } falcon; struct { void (*isr)(struct gk20a *g); + void (*decode_error_code)(struct gk20a *g, u32 error_code); } priv_ring; struct { int (*check_priv_security)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index dbea8033..bfd1e99a 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -49,6 +49,7 @@ #include "gp10b/fb_gp10b.h" #include "gp10b/pmu_gp10b.h" #include "gp10b/gr_gp10b.h" +#include "gp10b/priv_ring_gp10b.h" #include "gp106/fifo_gp106.h" #include "gp106/regops_gp106.h" @@ -729,6 +730,7 @@ static const struct gpu_ops gp106_ops = { }, .priv_ring = { .isr = gp10b_priv_ring_isr, + .decode_error_code = gp10b_priv_ring_decode_error_code, }, .fuse = { .check_priv_security = gp106_fuse_check_priv_security, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index dd413c5a..a6a66f93 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -646,6 +646,7 @@ static const struct gpu_ops gp10b_ops = { }, .priv_ring = { .isr = gp10b_priv_ring_isr, + .decode_error_code = gp10b_priv_ring_decode_error_code, }, .fuse = { .check_priv_security = gp10b_fuse_check_priv_security, diff --git a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c index 0fac76f2..12cd2a84 100644 --- a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c @@ -64,7 +64,7 @@ static const char *const error_type_badf5xyy[] = { "pri route error" }; -static void gp10b_priv_ring_decode_error_code(struct gk20a *g, +void gp10b_priv_ring_decode_error_code(struct gk20a *g, u32 error_code) { u32 error_type, error_type_index; @@ -141,7 +141,8 @@ void gp10b_priv_ring_isr(struct gk20a *g) pri_ringstation_sys_priv_error_info_subid_v(error_info), pri_ringstation_sys_priv_error_info_priv_level_v(error_info), error_code); - gp10b_priv_ring_decode_error_code(g, error_code); + if (g->ops.priv_ring.decode_error_code) + g->ops.priv_ring.decode_error_code(g, error_code); } if (status1) { @@ -166,7 +167,9 @@ void gp10b_priv_ring_isr(struct gk20a *g) pri_ringstation_gpc_gpc0_priv_error_info_priv_level_v(error_info), error_code); - gp10b_priv_ring_decode_error_code(g, error_code); + if (g->ops.priv_ring.decode_error_code) + g->ops.priv_ring.decode_error_code(g, + error_code); status1 = status1 & (~(BIT(gpc))); if (!status1) diff --git a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.h b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.h index 5b004a58..dd418e5b 100644 --- a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.h @@ -1,7 +1,7 @@ /* * GP10B PRIV ringmaster * - * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,5 +27,7 @@ struct gk20a; void gp10b_priv_ring_isr(struct gk20a *g); +void gp10b_priv_ring_decode_error_code(struct gk20a *g, + u32 error_code); #endif /*__PRIV_RING_GP10B_H__*/ diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index b0caf9a6..be9afd8b 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -734,6 +734,7 @@ static const struct gpu_ops gv100_ops = { }, .priv_ring = { .isr = gp10b_priv_ring_isr, + .decode_error_code = gp10b_priv_ring_decode_error_code, }, .nvlink = { .discover_ioctrl = gv100_nvlink_discover_ioctrl, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index ee3fc3de..2d4f82d1 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -711,6 +711,7 @@ static const struct gpu_ops gv11b_ops = { }, .priv_ring = { .isr = gp10b_priv_ring_isr, + .decode_error_code = gp10b_priv_ring_decode_error_code, }, .fuse = { .check_priv_security = gp10b_fuse_check_priv_security, -- cgit v1.2.2