summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-10-13 16:09:07 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-10-20 16:58:48 -0400
commitfd97ed15d6a5285f59ca37dba8086f7a2780d3af (patch)
tree35319f0fa92ad9403ee9668e715f6c8df3c57469 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parentc3b595178e9cffb617f104fa7880d704fceebd69 (diff)
gpu: nvgpu: Trigger recovery on HWW errors
Trigger recovery on DS and MEMFMT HWW errors, and write an error line to UART for each HWW error. Also capture the channel id before clearing the exception. Bug 1683059 Change-Id: Ia00d88a76371a4bd7e047915dde0bf0d4b84bc10 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/816983 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c24
1 files changed, 16 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ea06bd20..8c667ae7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5414,12 +5414,14 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event)
5414 5414
5415int gk20a_gr_isr(struct gk20a *g) 5415int gk20a_gr_isr(struct gk20a *g)
5416{ 5416{
5417 struct device *dev = dev_from_gk20a(g);
5417 struct gr_isr_data isr_data; 5418 struct gr_isr_data isr_data;
5418 u32 grfifo_ctl; 5419 u32 grfifo_ctl;
5419 u32 obj_table; 5420 u32 obj_table;
5420 int need_reset = 0; 5421 int need_reset = 0;
5421 u32 gr_intr = gk20a_readl(g, gr_intr_r()); 5422 u32 gr_intr = gk20a_readl(g, gr_intr_r());
5422 struct channel_gk20a *ch = NULL; 5423 struct channel_gk20a *ch = NULL;
5424 int tsgid = NVGPU_INVALID_TSG_ID;
5423 5425
5424 gk20a_dbg_fn(""); 5426 gk20a_dbg_fn("");
5425 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); 5427 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -5445,7 +5447,7 @@ int gk20a_gr_isr(struct gk20a *g)
5445 gr_fe_object_table_r(isr_data.sub_chan)) : 0; 5447 gr_fe_object_table_r(isr_data.sub_chan)) : 0;
5446 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); 5448 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5447 5449
5448 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL); 5450 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, &tsgid);
5449 if (!ch) { 5451 if (!ch) {
5450 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", 5452 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5451 isr_data.curr_ctx); 5453 isr_data.curr_ctx);
@@ -5538,16 +5540,16 @@ int gk20a_gr_isr(struct gk20a *g)
5538 5540
5539 if (exception & gr_exception_fe_m()) { 5541 if (exception & gr_exception_fe_m()) {
5540 u32 fe = gk20a_readl(g, gr_fe_hww_esr_r()); 5542 u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
5541 gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe); 5543 gk20a_err(dev, "fe warning %08x", fe);
5542 gk20a_writel(g, gr_fe_hww_esr_r(), fe); 5544 gk20a_writel(g, gr_fe_hww_esr_r(), fe);
5543 need_reset |= -EFAULT; 5545 need_reset |= -EFAULT;
5544 } 5546 }
5545 5547
5546 if (exception & gr_exception_memfmt_m()) { 5548 if (exception & gr_exception_memfmt_m()) {
5547 u32 memfmt = gk20a_readl(g, gr_memfmt_hww_esr_r()); 5549 u32 memfmt = gk20a_readl(g, gr_memfmt_hww_esr_r());
5548 gk20a_dbg(gpu_dbg_intr, "memfmt exception %08x\n", 5550 gk20a_err(dev, "memfmt exception %08x", memfmt);
5549 memfmt);
5550 gk20a_writel(g, gr_memfmt_hww_esr_r(), memfmt); 5551 gk20a_writel(g, gr_memfmt_hww_esr_r(), memfmt);
5552 need_reset |= -EFAULT;
5551 } 5553 }
5552 5554
5553 /* check if a gpc exception has occurred */ 5555 /* check if a gpc exception has occurred */
@@ -5582,17 +5584,23 @@ int gk20a_gr_isr(struct gk20a *g)
5582 5584
5583 if (exception & gr_exception_ds_m()) { 5585 if (exception & gr_exception_ds_m()) {
5584 u32 ds = gk20a_readl(g, gr_ds_hww_esr_r()); 5586 u32 ds = gk20a_readl(g, gr_ds_hww_esr_r());
5585 gk20a_dbg(gpu_dbg_intr, "ds exception %08x\n", ds); 5587 gk20a_err(dev, "ds exception %08x", ds);
5586 gk20a_writel(g, gr_ds_hww_esr_r(), ds); 5588 gk20a_writel(g, gr_ds_hww_esr_r(), ds);
5589 need_reset |= -EFAULT;
5587 } 5590 }
5588 5591
5589 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); 5592 gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f());
5590 gr_intr &= ~gr_intr_exception_pending_f(); 5593 gr_intr &= ~gr_intr_exception_pending_f();
5591 } 5594 }
5592 5595
5593 if (need_reset) 5596 if (need_reset) {
5594 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), 5597 if (tsgid != NVGPU_INVALID_TSG_ID)
5595 ~(u32)0, false, false, true); 5598 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
5599 tsgid, true, true, true);
5600 else
5601 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
5602 ch->hw_chid, false, true, true);
5603 }
5596 5604
5597clean_up: 5605clean_up:
5598 if (gr_intr && !ch) { 5606 if (gr_intr && !ch) {