diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fb_gv11b.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 35 |
5 files changed, 53 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 551e8b04..4e6837ef 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -684,6 +684,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | |||
684 | ch->subctx_id = 0; | 684 | ch->subctx_id = 0; |
685 | ch->runqueue_sel = 0; | 685 | ch->runqueue_sel = 0; |
686 | 686 | ||
687 | ch->mmu_nack_handled = false; | ||
688 | |||
687 | /* The channel is *not* runnable at this point. It still needs to have | 689 | /* The channel is *not* runnable at this point. It still needs to have |
688 | * an address space bound and allocate a gpfifo and grctx. */ | 690 | * an address space bound and allocate a gpfifo and grctx. */ |
689 | 691 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index f95184be..aa37db62 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -295,6 +295,8 @@ struct channel_gk20a { | |||
295 | 295 | ||
296 | /* Any operating system specific data. */ | 296 | /* Any operating system specific data. */ |
297 | void *os_priv; | 297 | void *os_priv; |
298 | |||
299 | bool mmu_nack_handled; | ||
298 | }; | 300 | }; |
299 | 301 | ||
300 | static inline struct channel_gk20a * | 302 | static inline struct channel_gk20a * |
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c index ce8f5669..bba7e66c 100644 --- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |||
@@ -935,12 +935,37 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, | |||
935 | id = mmfault->chid; | 935 | id = mmfault->chid; |
936 | id_type = ID_TYPE_CHANNEL; | 936 | id_type = ID_TYPE_CHANNEL; |
937 | } | 937 | } |
938 | if (mmfault->refch->mmu_nack_handled) { | ||
939 | /* We have already recovered for the same | ||
940 | * context, skip doing another recovery. | ||
941 | */ | ||
942 | mmfault->refch->mmu_nack_handled = false; | ||
943 | /* | ||
944 | * Recovery path can be entered twice for the | ||
945 | * same error in case of mmu nack. If mmu | ||
946 | * nack interrupt is handled before mmu fault | ||
947 | * then channel reference is increased to avoid | ||
948 | * closing the channel by userspace. Decrement | ||
949 | * channel reference. | ||
950 | */ | ||
951 | gk20a_channel_put(mmfault->refch); | ||
952 | /* refch in mmfault is assigned at the time | ||
953 | * of copying fault info from snap reg or bar2 | ||
954 | * fault buf. | ||
955 | */ | ||
956 | gk20a_channel_put(mmfault->refch); | ||
957 | return; | ||
958 | } | ||
938 | } else { | 959 | } else { |
939 | id_type = ID_TYPE_UNKNOWN; | 960 | id_type = ID_TYPE_UNKNOWN; |
940 | } | 961 | } |
941 | if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) | 962 | if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) |
942 | act_eng_bitmask = BIT(mmfault->faulted_engine); | 963 | act_eng_bitmask = BIT(mmfault->faulted_engine); |
943 | 964 | ||
965 | /* Indicate recovery is handled if mmu fault is a result of | ||
966 | * mmu nack. | ||
967 | */ | ||
968 | mmfault->refch->mmu_nack_handled = true; | ||
944 | g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, | 969 | g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask, |
945 | id, id_type, RC_TYPE_MMU_FAULT, mmfault); | 970 | id, id_type, RC_TYPE_MMU_FAULT, mmfault); |
946 | } else { | 971 | } else { |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 84d63b91..9c64675f 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -596,6 +596,9 @@ void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g, | |||
596 | nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x", | 596 | nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x", |
597 | faulted_pbdma, faulted_engine); | 597 | faulted_pbdma, faulted_engine); |
598 | 598 | ||
599 | if (!refch) | ||
600 | return; | ||
601 | |||
599 | if (gk20a_is_channel_marked_as_tsg(refch)) { | 602 | if (gk20a_is_channel_marked_as_tsg(refch)) { |
600 | tsg = &g->fifo.tsg[refch->tsgid]; | 603 | tsg = &g->fifo.tsg[refch->tsgid]; |
601 | if (faulted_pbdma != FIFO_INVAL_PBDMA_ID) | 604 | if (faulted_pbdma != FIFO_INVAL_PBDMA_ID) |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 694ff8ad..aed45ceb 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -2093,23 +2093,30 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, | |||
2093 | u32 warp_esr_error, | 2093 | u32 warp_esr_error, |
2094 | struct channel_gk20a *fault_ch) | 2094 | struct channel_gk20a *fault_ch) |
2095 | { | 2095 | { |
2096 | struct tsg_gk20a *tsg; | ||
2097 | u32 offset; | 2096 | u32 offset; |
2097 | int err = 0; | ||
2098 | 2098 | ||
2099 | fault_ch = gk20a_channel_get(fault_ch); | ||
2099 | if (fault_ch) { | 2100 | if (fault_ch) { |
2100 | tsg = &g->fifo.tsg[fault_ch->tsgid]; | 2101 | if (!fault_ch->mmu_nack_handled) { |
2101 | 2102 | /* recovery is not done for the channel implying mmu | |
2102 | /* | 2103 | * nack interrupt is serviced before mmu fault. Force |
2103 | * Upon receiving MMU_FAULT error, MMU will forward MMU_NACK | 2104 | * recovery by returning an error. Also indicate we |
2104 | * to SM. So MMU_FAULT handling path will take care of | 2105 | * should skip a second recovery. |
2105 | * triggering RC recovery | 2106 | */ |
2106 | * | 2107 | fault_ch->mmu_nack_handled = true; |
2107 | * In MMU_NACK handling path, we just set the error notifier | 2108 | err = -EFAULT; |
2108 | * and clear the interrupt so that the User Space sees the error | 2109 | } |
2109 | * as soon as semaphores are released by SM | ||
2110 | */ | ||
2111 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); | ||
2112 | } | 2110 | } |
2111 | /* else mmu fault is serviced first and channel is closed */ | ||
2112 | |||
2113 | /* do not release reference to ch as we do not want userspace to close | ||
2114 | * this channel on recovery. Otherwise mmu fault handler will enter | ||
2115 | * recovery path even if channel is invalid. We want to explicitly check | ||
2116 | * for teardown value in mmu fault handler. | ||
2117 | */ | ||
2118 | if (!err) | ||
2119 | gk20a_channel_put(fault_ch); | ||
2113 | 2120 | ||
2114 | /* clear interrupt */ | 2121 | /* clear interrupt */ |
2115 | offset = gk20a_gr_gpc_offset(g, gpc) + | 2122 | offset = gk20a_gr_gpc_offset(g, gpc) + |
@@ -2122,7 +2129,7 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, | |||
2122 | "ESR %s(0x%x)", | 2129 | "ESR %s(0x%x)", |
2123 | "MMU NACK ERROR", | 2130 | "MMU NACK ERROR", |
2124 | warp_esr_error); | 2131 | warp_esr_error); |
2125 | return 0; | 2132 | return err; |
2126 | } | 2133 | } |
2127 | 2134 | ||
2128 | static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) | 2135 | static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) |