diff options
author | Seema Khowala <seemaj@nvidia.com> | 2018-02-22 16:00:25 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-05-04 00:43:06 -0400 |
commit | c9463fdbb31324cc8eaa7fbed69f8d4b98ef38b5 (patch) | |
tree | c7b620761aff85eea584ed63573548fcd7d679c4 /drivers/gpu | |
parent | bf0379997799e7f83514e974cd02aaaab85a4101 (diff) |
gpu: nvgpu: add rc_type i/p param to gk20a_fifo_recover
Add below rc_types to be passed to gk20a_fifo_recover
MMU_FAULT
PBDMA_FAULT
GR_FAULT
PREEMPT_TIMEOUT
CTXSW_TIMEOUT
RUNLIST_UPDATE_TIMEOUT
FORCE_RESET
SCHED_ERR
This is nice to have to know what triggered recovery.
Bug 2065990
Change-Id: I202268c5f237be2180b438e8ba027fce684967b6
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1662619
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 49 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 17 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 6 |
4 files changed, 53 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 48982a04..fc63dcbe 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -1817,7 +1817,7 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) | |||
1817 | return engines; | 1817 | return engines; |
1818 | } | 1818 | } |
1819 | 1819 | ||
1820 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose) | 1820 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose, int rc_type) |
1821 | { | 1821 | { |
1822 | u32 engines; | 1822 | u32 engines; |
1823 | 1823 | ||
@@ -1829,7 +1829,8 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose) | |||
1829 | engines = gk20a_fifo_engines_on_id(g, chid, false); | 1829 | engines = gk20a_fifo_engines_on_id(g, chid, false); |
1830 | 1830 | ||
1831 | if (engines) | 1831 | if (engines) |
1832 | gk20a_fifo_recover(g, engines, chid, false, true, verbose); | 1832 | gk20a_fifo_recover(g, engines, chid, false, true, verbose, |
1833 | rc_type); | ||
1833 | else { | 1834 | else { |
1834 | struct channel_gk20a *ch = &g->fifo.channel[chid]; | 1835 | struct channel_gk20a *ch = &g->fifo.channel[chid]; |
1835 | 1836 | ||
@@ -1847,7 +1848,8 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose) | |||
1847 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 1848 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
1848 | } | 1849 | } |
1849 | 1850 | ||
1850 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) | 1851 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose, |
1852 | int rc_type) | ||
1851 | { | 1853 | { |
1852 | u32 engines; | 1854 | u32 engines; |
1853 | 1855 | ||
@@ -1859,7 +1861,8 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) | |||
1859 | engines = gk20a_fifo_engines_on_id(g, tsgid, true); | 1861 | engines = gk20a_fifo_engines_on_id(g, tsgid, true); |
1860 | 1862 | ||
1861 | if (engines) | 1863 | if (engines) |
1862 | gk20a_fifo_recover(g, engines, tsgid, true, true, verbose); | 1864 | gk20a_fifo_recover(g, engines, tsgid, true, true, verbose, |
1865 | rc_type); | ||
1863 | else { | 1866 | else { |
1864 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; | 1867 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; |
1865 | 1868 | ||
@@ -1956,7 +1959,7 @@ void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids, | |||
1956 | 1959 | ||
1957 | void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | 1960 | void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, |
1958 | u32 hw_id, bool id_is_tsg, | 1961 | u32 hw_id, bool id_is_tsg, |
1959 | bool id_is_known, bool verbose) | 1962 | bool id_is_known, bool verbose, int rc_type) |
1960 | { | 1963 | { |
1961 | unsigned int id_type; | 1964 | unsigned int id_type; |
1962 | 1965 | ||
@@ -1972,7 +1975,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | |||
1972 | id_type = ID_TYPE_UNKNOWN; | 1975 | id_type = ID_TYPE_UNKNOWN; |
1973 | 1976 | ||
1974 | g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type, | 1977 | g->ops.fifo.teardown_ch_tsg(g, __engine_ids, hw_id, id_type, |
1975 | RC_TYPE_NORMAL, NULL); | 1978 | rc_type, NULL); |
1976 | } | 1979 | } |
1977 | 1980 | ||
1978 | /* force reset channel and tsg (if it's part of one) */ | 1981 | /* force reset channel and tsg (if it's part of one) */ |
@@ -1998,10 +2001,12 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, | |||
1998 | } | 2001 | } |
1999 | 2002 | ||
2000 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); | 2003 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); |
2001 | gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); | 2004 | gk20a_fifo_recover_tsg(g, ch->tsgid, verbose, |
2005 | RC_TYPE_FORCE_RESET); | ||
2002 | } else { | 2006 | } else { |
2003 | g->ops.fifo.set_error_notifier(ch, err_code); | 2007 | g->ops.fifo.set_error_notifier(ch, err_code); |
2004 | gk20a_fifo_recover_ch(g, ch->chid, verbose); | 2008 | gk20a_fifo_recover_ch(g, ch->chid, verbose, |
2009 | RC_TYPE_FORCE_RESET); | ||
2005 | } | 2010 | } |
2006 | 2011 | ||
2007 | return 0; | 2012 | return 0; |
@@ -2288,7 +2293,8 @@ bool gk20a_fifo_handle_sched_error(struct gk20a *g) | |||
2288 | */ | 2293 | */ |
2289 | gk20a_channel_timeout_restart_all_channels(g); | 2294 | gk20a_channel_timeout_restart_all_channels(g); |
2290 | gk20a_fifo_recover(g, BIT(engine_id), id, | 2295 | gk20a_fifo_recover(g, BIT(engine_id), id, |
2291 | is_tsg, true, verbose); | 2296 | is_tsg, true, verbose, |
2297 | RC_TYPE_CTXSW_TIMEOUT); | ||
2292 | } else { | 2298 | } else { |
2293 | gk20a_dbg_info( | 2299 | gk20a_dbg_info( |
2294 | "fifo is waiting for ctx switch for %d ms, " | 2300 | "fifo is waiting for ctx switch for %d ms, " |
@@ -2542,7 +2548,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, | |||
2542 | 2548 | ||
2543 | if (gk20a_channel_get(ch)) { | 2549 | if (gk20a_channel_get(ch)) { |
2544 | g->ops.fifo.set_error_notifier(ch, error_notifier); | 2550 | g->ops.fifo.set_error_notifier(ch, error_notifier); |
2545 | gk20a_fifo_recover_ch(g, id, true); | 2551 | gk20a_fifo_recover_ch(g, id, true, RC_TYPE_PBDMA_FAULT); |
2546 | gk20a_channel_put(ch); | 2552 | gk20a_channel_put(ch); |
2547 | } | 2553 | } |
2548 | } else if (fifo_pbdma_status_id_type_v(status) | 2554 | } else if (fifo_pbdma_status_id_type_v(status) |
@@ -2560,7 +2566,7 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, | |||
2560 | } | 2566 | } |
2561 | } | 2567 | } |
2562 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); | 2568 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); |
2563 | gk20a_fifo_recover_tsg(g, id, true); | 2569 | gk20a_fifo_recover_tsg(g, id, true, RC_TYPE_PBDMA_FAULT); |
2564 | } | 2570 | } |
2565 | } | 2571 | } |
2566 | 2572 | ||
@@ -2578,8 +2584,10 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f, | |||
2578 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, | 2584 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, |
2579 | "pbdma id %d intr_0 0x%08x pending", | 2585 | "pbdma id %d intr_0 0x%08x pending", |
2580 | pbdma_id, pbdma_intr_0); | 2586 | pbdma_id, pbdma_intr_0); |
2581 | rc_type = g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, | 2587 | |
2582 | pbdma_intr_0, &handled, &error_notifier); | 2588 | if (g->ops.fifo.handle_pbdma_intr_0(g, pbdma_id, pbdma_intr_0, |
2589 | &handled, &error_notifier) != RC_TYPE_NO_RC) | ||
2590 | rc_type = RC_TYPE_PBDMA_FAULT; | ||
2583 | gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0); | 2591 | gk20a_writel(g, pbdma_intr_0_r(pbdma_id), pbdma_intr_0); |
2584 | } | 2592 | } |
2585 | 2593 | ||
@@ -2587,8 +2595,10 @@ u32 gk20a_fifo_handle_pbdma_intr(struct gk20a *g, struct fifo_gk20a *f, | |||
2587 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, | 2595 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr, |
2588 | "pbdma id %d intr_1 0x%08x pending", | 2596 | "pbdma id %d intr_1 0x%08x pending", |
2589 | pbdma_id, pbdma_intr_1); | 2597 | pbdma_id, pbdma_intr_1); |
2590 | rc_type = g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, | 2598 | |
2591 | pbdma_intr_1, &handled, &error_notifier); | 2599 | if (g->ops.fifo.handle_pbdma_intr_1(g, pbdma_id, pbdma_intr_1, |
2600 | &handled, &error_notifier) != RC_TYPE_NO_RC) | ||
2601 | rc_type = RC_TYPE_PBDMA_FAULT; | ||
2592 | gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1); | 2602 | gk20a_writel(g, pbdma_intr_1_r(pbdma_id), pbdma_intr_1); |
2593 | } | 2603 | } |
2594 | 2604 | ||
@@ -2721,7 +2731,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, | |||
2721 | gk20a_channel_put(ch); | 2731 | gk20a_channel_put(ch); |
2722 | } | 2732 | } |
2723 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); | 2733 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); |
2724 | gk20a_fifo_recover_tsg(g, id, true); | 2734 | gk20a_fifo_recover_tsg(g, id, true, |
2735 | RC_TYPE_PREEMPT_TIMEOUT); | ||
2725 | } else { | 2736 | } else { |
2726 | struct channel_gk20a *ch = &g->fifo.channel[id]; | 2737 | struct channel_gk20a *ch = &g->fifo.channel[id]; |
2727 | 2738 | ||
@@ -2731,7 +2742,8 @@ void __locked_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, | |||
2731 | if (gk20a_channel_get(ch)) { | 2742 | if (gk20a_channel_get(ch)) { |
2732 | g->ops.fifo.set_error_notifier(ch, | 2743 | g->ops.fifo.set_error_notifier(ch, |
2733 | NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); | 2744 | NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); |
2734 | gk20a_fifo_recover_ch(g, id, true); | 2745 | gk20a_fifo_recover_ch(g, id, true, |
2746 | RC_TYPE_PREEMPT_TIMEOUT); | ||
2735 | gk20a_channel_put(ch); | 2747 | gk20a_channel_put(ch); |
2736 | } | 2748 | } |
2737 | } | 2749 | } |
@@ -3024,7 +3036,8 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id) | |||
3024 | } | 3036 | } |
3025 | 3037 | ||
3026 | if (engines) | 3038 | if (engines) |
3027 | gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true); | 3039 | gk20a_fifo_recover(g, engines, ~(u32)0, false, false, true, |
3040 | RC_TYPE_RUNLIST_UPDATE_TIMEOUT); | ||
3028 | } | 3041 | } |
3029 | 3042 | ||
3030 | int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) | 3043 | int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index c6d34945..c4f7f8ac 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -57,10 +57,15 @@ enum { | |||
57 | 57 | ||
58 | #define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000 | 58 | #define GRFIFO_TIMEOUT_CHECK_PERIOD_US 100000 |
59 | 59 | ||
60 | #define RC_TYPE_NORMAL 0 | 60 | #define RC_TYPE_NO_RC 0 |
61 | #define RC_TYPE_MMU_FAULT 1 | 61 | #define RC_TYPE_MMU_FAULT 1 |
62 | #define RC_TYPE_PBDMA_FAULT 2 | 62 | #define RC_TYPE_PBDMA_FAULT 2 |
63 | #define RC_TYPE_NO_RC 0xff | 63 | #define RC_TYPE_GR_FAULT 3 |
64 | #define RC_TYPE_PREEMPT_TIMEOUT 4 | ||
65 | #define RC_TYPE_CTXSW_TIMEOUT 5 | ||
66 | #define RC_TYPE_RUNLIST_UPDATE_TIMEOUT 6 | ||
67 | #define RC_TYPE_FORCE_RESET 7 | ||
68 | #define RC_TYPE_SCHED_ERR 8 | ||
64 | 69 | ||
65 | #define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL | 70 | #define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL |
66 | #define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL | 71 | #define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL |
@@ -256,9 +261,11 @@ void gk20a_fifo_recover(struct gk20a *g, | |||
256 | u32 engine_ids, /* if zero, will be queried from HW */ | 261 | u32 engine_ids, /* if zero, will be queried from HW */ |
257 | u32 hw_id, /* if ~0, will be queried from HW */ | 262 | u32 hw_id, /* if ~0, will be queried from HW */ |
258 | bool hw_id_is_tsg, /* ignored if hw_id == ~0 */ | 263 | bool hw_id_is_tsg, /* ignored if hw_id == ~0 */ |
259 | bool id_is_known, bool verbose); | 264 | bool id_is_known, bool verbose, int rc_type); |
260 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose); | 265 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose, |
261 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); | 266 | int rc_type); |
267 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose, | ||
268 | int rc_type); | ||
262 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, | 269 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, |
263 | u32 err_code, bool verbose); | 270 | u32 err_code, bool verbose); |
264 | void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); | 271 | void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index d26d8a93..86111321 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6088,13 +6088,16 @@ int gk20a_gr_isr(struct gk20a *g) | |||
6088 | if (need_reset) { | 6088 | if (need_reset) { |
6089 | if (tsgid != NVGPU_INVALID_TSG_ID) | 6089 | if (tsgid != NVGPU_INVALID_TSG_ID) |
6090 | gk20a_fifo_recover(g, gr_engine_id, | 6090 | gk20a_fifo_recover(g, gr_engine_id, |
6091 | tsgid, true, true, true); | 6091 | tsgid, true, true, true, |
6092 | RC_TYPE_GR_FAULT); | ||
6092 | else if (ch) | 6093 | else if (ch) |
6093 | gk20a_fifo_recover(g, gr_engine_id, | 6094 | gk20a_fifo_recover(g, gr_engine_id, |
6094 | ch->chid, false, true, true); | 6095 | ch->chid, false, true, true, |
6096 | RC_TYPE_GR_FAULT); | ||
6095 | else | 6097 | else |
6096 | gk20a_fifo_recover(g, gr_engine_id, | 6098 | gk20a_fifo_recover(g, gr_engine_id, |
6097 | 0, false, false, true); | 6099 | 0, false, false, true, |
6100 | RC_TYPE_GR_FAULT); | ||
6098 | } | 6101 | } |
6099 | 6102 | ||
6100 | if (gr_intr && !ch) { | 6103 | if (gr_intr && !ch) { |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index d8976608..11b393e5 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -1306,7 +1306,8 @@ bool gv11b_fifo_handle_sched_error(struct gk20a *g) | |||
1306 | 1306 | ||
1307 | if (sched_error == SCHED_ERROR_CODE_BAD_TSG ) { | 1307 | if (sched_error == SCHED_ERROR_CODE_BAD_TSG ) { |
1308 | /* id is unknown, preempt all runlists and do recovery */ | 1308 | /* id is unknown, preempt all runlists and do recovery */ |
1309 | gk20a_fifo_recover(g, 0, 0, false, false, false); | 1309 | gk20a_fifo_recover(g, 0, 0, false, false, false, |
1310 | RC_TYPE_SCHED_ERR); | ||
1310 | } | 1311 | } |
1311 | 1312 | ||
1312 | return false; | 1313 | return false; |
@@ -1465,7 +1466,8 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g, u32 fifo_intr) | |||
1465 | /* Cancel all channels' timeout */ | 1466 | /* Cancel all channels' timeout */ |
1466 | gk20a_channel_timeout_restart_all_channels(g); | 1467 | gk20a_channel_timeout_restart_all_channels(g); |
1467 | gk20a_fifo_recover(g, BIT(active_eng_id), tsgid, | 1468 | gk20a_fifo_recover(g, BIT(active_eng_id), tsgid, |
1468 | true, true, verbose); | 1469 | true, true, verbose, |
1470 | RC_TYPE_CTXSW_TIMEOUT); | ||
1469 | } else { | 1471 | } else { |
1470 | gk20a_dbg_info( | 1472 | gk20a_dbg_info( |
1471 | "fifo is waiting for ctx switch: " | 1473 | "fifo is waiting for ctx switch: " |