summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-02-22 16:00:25 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-04 00:43:06 -0400
commitc9463fdbb31324cc8eaa7fbed69f8d4b98ef38b5 (patch)
treec7b620761aff85eea584ed63573548fcd7d679c4
parentbf0379997799e7f83514e974cd02aaaab85a4101 (diff)
gpu: nvgpu: add rc_type i/p param to gk20a_fifo_recover
Add below rc_types to be passed to gk20a_fifo_recover MMU_FAULT PBDMA_FAULT GR_FAULT PREEMPT_TIMEOUT CTXSW_TIMEOUT RUNLIST_UPDATE_TIMEOUT FORCE_RESET SCHED_ERR This is nice to have to know what triggered recovery. Bug 2065990 Change-Id: I202268c5f237be2180b438e8ba027fce684967b6 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1662619 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c49
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h17
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c6
4 files changed, 53 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 48982a04..fc63dcbe 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1817,7 +1817,7 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1817 return engines; 1817 return engines;
1818} 1818}
1819 1819
1820void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose) 1820void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose, int rc_type)
1821{ 1821{
1822 u32 engines; 1822 u32 engines;
1823 1823
@@ -1829,7 +1829,8 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose)
1829 engines = gk20a_fifo_engines_on_id(g, chid, false); 1829 engines = gk20a_fifo_engines_on_id(g, chid, false);
1830 1830
1831 if (engines) 1831 if (engines)
1832 gk20a_fifo_recover(g, engines, chid, false, true, verbose); 1832 gk20a_fifo_recover(g, engines, chid, false, true, verbose,
1833 rc_type);
1833 else { 1834 else {
1834 struct channel_gk20a *ch = &g->fifo.channel[chid]; 1835 struct channel_gk20a *ch = &g->fifo.channel[chid];
1835 1836
@@ -1847,7 +1848,8 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose)
1847 nvgpu_mutex_release(&g->dbg_sessions_lock); 1848 nvgpu_mutex_release(&g->dbg_sessions_lock);
1848} 1849}
1849 1850
1850void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) 1851void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose,
1852 int rc_type)
1851{ 1853{
1852 u32 engines; 1854 u32 engines;
1853 1855
@@ -1859,7 +1861,8 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1859 engines = gk20a_fifo_engines_on_id(g, tsgid, true); 1861 engines = gk20a_fifo_engines_on_id(g, tsgid, true);
1860 1862
1861 if (engines) 1863 if (engines)
1862 gk20a_fifo_recover(g, engines, tsgid, true, true, verbose); 1864 gk20a_fifo_recover(g, engines, tsgid, true, true, verbose,
1865 rc_type);
1863 else { 1866 else {
1864 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1867 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1865 1868
@@ -1956,7 +1959,7 @@ void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
1956 1959
1957void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, 1960void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1958 u32 hw_id, bool id_is_tsg, 1961 u32 hw_id, bool id_is_tsg,
1959 bool id_is_known, bool verbose) 1962 bool id_is_known, bool verbose, int rc_type)
1960{ 1963{
1961 unsigned int id_type; 1964 unsigned int id_type;
1962 1965
@@ -1972,7 +1975,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1972 id_type = ID_TYPE_UNKNOWN; 1975 id_type = ID_TYPE_UNKNOWN;
1973 1976
1974 g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type, 1977 g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type,
1975 RC_TYPE_NORMAL, NULL); 1978 rc_type, NULL);
1976} 1979}
1977 1980
1978/* force reset channel and tsg (if it's part of one) */ 1981/* force reset channel and tsg (if it's part of one) */
@@ -1998,10 +2001,12 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
1998 } 2001 }
1999 2002
2000 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 2003 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2001 gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); 2004 gk20a_fifo_recover_tsg(g, ch->tsgid, verbose,
2005 RC_TYPE_FORCE_RESET);
2002 } else { 2006 } else {
2003 g->ops.fifo.set_error_notifier(ch, err_code); 2007 g->ops.fifo.set_error_notifier(ch, err_code);
2004 gk20a_fifo_recover_ch(g, ch->chid, verbose); 2008 gk20a_fifo_recover_ch(g, ch->chid, verbose,
2009 RC_TYPE_FORCE_RESET);
2005 } 2010 }
2006 2011
2007 return 0; 2012 return 0;
@@ -2288,7 +2293,8 @@ bool gk20a_fifo_handle_sched_error(struct gk20a *g)
2288 */ 2293 */
2289 gk20a_channel_timeout_restart_all_channels(g); 2294 gk20a_channel_timeout_restart_all_channels(g);
2290 gk20a_fifo_recover(g, BIT(engine_id), id, 2295 gk20a_fifo_recover(g, BIT(engine_id), id,
2291 is_tsg, true, verbose); 2296 is_tsg, true, verbose,
2297 RC_TYPE_CTXSW_TIMEOUT);
2292 } else { 2298 } else {
2293 gk20a_dbg_info( 2299 gk20a_dbg_info(
2294 "fifo is waiting for ctx switch for %d ms, " 2300 "fifo is waiting for ctx switch for %d ms, "
@@ -2542,7 +2548,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2542 2548
2543 if (gk20a_channel_get(ch)) { 2549 if (gk20a_channel_get(ch)) {
2544 g->ops.fifo.set_error_notifier(ch, error_notifier); 2550 g->ops.fifo.set_error_notifier(ch, error_notifier);
2545 gk20a_fifo_recover_ch(g, id, true); 2551 gk20a_fifo_recover_ch(g, id, true, RC_TYPE_PBDMA_FAULT);
2546 gk20a_channel_put(ch); 2552 gk20a_channel_put(ch);
2547 } 2553 }
2548 } else if (fifo_pbdma_status_id_type_v(status) 2554 } else if (fifo_pbdma_status_id_type_v(status)
@@ -2560,7 +2566,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
2560 } 2566 }
2561 } 2567 }
2562 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 2568 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2563 gk20a_fifo_recover_tsg(g, id, true); 2569 gk20a_fifo_recover_tsg(g, id, true, RC_TYPE_PBDMA_FAULT);
2564 } 2570 }
2565} 2571}
2566 2572
@@ -2578,8 +2584,10 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2578 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, 2584 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2579 "pbdma id %d intr_0 0x%08x pending", 2585 "pbdma id %d intr_0 0x%08x pending",
2580 pbdma_id, pbdma_intr_0); 2586 pbdma_id, pbdma_intr_0);
2581 rc_type = g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, 2587
2582 pbdma_intr_0, &handled, &error_notifier); 2588 if (g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, pbdma_intr_0,
2589 &handled, &error_notifier) != RC_TYPE_NO_RC)
2590 rc_type = RC_TYPE_PBDMA_FAULT;
2583 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0); 2591 gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0);
2584 } 2592 }
2585 2593
@@ -2587,8 +2595,10 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f,
2587 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, 2595 nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
2588 "pbdma id %d intr_1 0x%08x pending", 2596 "pbdma id %d intr_1 0x%08x pending",
2589 pbdma_id, pbdma_intr_1); 2597 pbdma_id, pbdma_intr_1);
2590 rc_type = g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, 2598
2591 pbdma_intr_1, &handled, &error_notifier); 2599 if (g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, pbdma_intr_1,
2600 &handled, &error_notifier) != RC_TYPE_NO_RC)
2601 rc_type = RC_TYPE_PBDMA_FAULT;
2592 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1); 2602 gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1);
2593 } 2603 }
2594 2604
@@ -2721,7 +2731,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
2721 gk20a_channel_put(ch); 2731 gk20a_channel_put(ch);
2722 } 2732 }
2723 nvgpu_rwsem_up_read(&tsg->ch_list_lock); 2733 nvgpu_rwsem_up_read(&tsg->ch_list_lock);
2724 gk20a_fifo_recover_tsg(g, id, true); 2734 gk20a_fifo_recover_tsg(g, id, true,
2735 RC_TYPE_PREEMPT_TIMEOUT);
2725 } else { 2736 } else {
2726 struct channel_gk20a *ch = &g->fifo.channel[id]; 2737 struct channel_gk20a *ch = &g->fifo.channel[id];
2727 2738
@@ -2731,7 +2742,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
2731 if (gk20a_channel_get(ch)) { 2742 if (gk20a_channel_get(ch)) {
2732 g->ops.fifo.set_error_notifier(ch, 2743 g->ops.fifo.set_error_notifier(ch,
2733 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); 2744 NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
2734 gk20a_fifo_recover_ch(g, id, true); 2745 gk20a_fifo_recover_ch(g, id, true,
2746 RC_TYPE_PREEMPT_TIMEOUT);
2735 gk20a_channel_put(ch); 2747 gk20a_channel_put(ch);
2736 } 2748 }
2737 } 2749 }
@@ -3024,7 +3036,8 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
3024 } 3036 }
3025 3037
3026 if (engines) 3038 if (engines)
3027 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true); 3039 gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true,
3040 RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
3028} 3041}
3029 3042
3030int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) 3043int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index c6d34945..c4f7f8ac 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -57,10 +57,15 @@ enum {
57 57
58#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000 58#define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000
59 59
60#define RC_TYPE_NORMAL 0 60#define RC_TYPE_NO_RC 0
61#define RC_TYPE_MMU_FAULT 1 61#define RC_TYPE_MMU_FAULT 1
62#define RC_TYPE_PBDMA_FAULT 2 62#define RC_TYPE_PBDMA_FAULT 2
63#define RC_TYPE_NO_RC 0xff 63#define RC_TYPE_GR_FAULT 3
64#define RC_TYPE_PREEMPT_TIMEOUT 4
65#define RC_TYPE_CTXSW_TIMEOUT 5
66#define RC_TYPE_RUNLIST_UPDATE_TIMEOUT 6
67#define RC_TYPE_FORCE_RESET 7
68#define RC_TYPE_SCHED_ERR 8
64 69
65#define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL 70#define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL
66#define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL 71#define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL
@@ -256,9 +261,11 @@ void gk20a_fifo_recover(struct gk20a *g,
256 u32 engine_ids, /* if zero, will be queried from HW */ 261 u32 engine_ids, /* if zero, will be queried from HW */
257 u32 hw_id, /* if ~0, will be queried from HW */ 262 u32 hw_id, /* if ~0, will be queried from HW */
258 bool hw_id_is_tsg, /* ignored if hw_id == ~0 */ 263 bool hw_id_is_tsg, /* ignored if hw_id == ~0 */
259 bool id_is_known, bool verbose); 264 bool id_is_known, bool verbose, int rc_type);
260void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose); 265void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose,
261void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); 266 int rc_type);
267void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose,
268 int rc_type);
262int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, 269int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
263 u32 err_code, bool verbose); 270 u32 err_code, bool verbose);
264void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); 271void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d26d8a93..86111321 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6088,13 +6088,16 @@ int gk20a_gr_isr(struct gk20a *g)
6088 if (need_reset) { 6088 if (need_reset) {
6089 if (tsgid != NVGPU_INVALID_TSG_ID) 6089 if (tsgid != NVGPU_INVALID_TSG_ID)
6090 gk20a_fifo_recover(g, gr_engine_id, 6090 gk20a_fifo_recover(g, gr_engine_id,
6091 tsgid, true, true, true); 6091 tsgid, true, true, true,
6092 RC_TYPE_GR_FAULT);
6092 else if (ch) 6093 else if (ch)
6093 gk20a_fifo_recover(g, gr_engine_id, 6094 gk20a_fifo_recover(g, gr_engine_id,
6094 ch->chid, false, true, true); 6095 ch->chid, false, true, true,
6096 RC_TYPE_GR_FAULT);
6095 else 6097 else
6096 gk20a_fifo_recover(g, gr_engine_id, 6098 gk20a_fifo_recover(g, gr_engine_id,
6097 0, false, false, true); 6099 0, false, false, true,
6100 RC_TYPE_GR_FAULT);
6098 } 6101 }
6099 6102
6100 if (gr_intr && !ch) { 6103 if (gr_intr && !ch) {
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index d8976608..11b393e5 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -1306,7 +1306,8 @@ bool gv11b_fifo_handle_sched_error(struct gk20a *g)
1306 1306
1307 if (sched_error == SCHED_ERROR_CODE_BAD_TSG ) { 1307 if (sched_error == SCHED_ERROR_CODE_BAD_TSG ) {
1308 /* id is unknown, preempt all runlists and do recovery */ 1308 /* id is unknown, preempt all runlists and do recovery */
1309 gk20a_fifo_recover(g, 0, 0, false, false, false); 1309 gk20a_fifo_recover(g, 0, 0, false, false, false,
1310 RC_TYPE_SCHED_ERR);
1310 } 1311 }
1311 1312
1312 return false; 1313 return false;
@@ -1465,7 +1466,8 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g, u32 fifo_intr)
1465 /* Cancel all channels' timeout */ 1466 /* Cancel all channels' timeout */
1466 gk20a_channel_timeout_restart_all_channels(g); 1467 gk20a_channel_timeout_restart_all_channels(g);
1467 gk20a_fifo_recover(g, BIT(active_eng_id), tsgid, 1468 gk20a_fifo_recover(g, BIT(active_eng_id), tsgid,
1468 true, true, verbose); 1469 true, true, verbose,
1470 RC_TYPE_CTXSW_TIMEOUT);
1469 } else { 1471 } else {
1470 gk20a_dbg_info( 1472 gk20a_dbg_info(
1471 "fifo is waiting for ctx switch: " 1473 "fifo is waiting for ctx switch: "