diff options
author | Richard Zhao <rizhao@nvidia.com> | 2017-10-30 01:30:04 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-01 22:06:45 -0400 |
commit | 5eedf06bf56489bc559a08347f60a7680ccd6897 (patch) | |
tree | 1fe1e6eb8ead4c55166c1607e599d548093a2f40 /drivers | |
parent | 88ee812d56333375f7ae44e28b483c1a161d75da (diff) |
gpu: nvgpu: vgpu: set mmu error for all channels of a tsg at once
In current code vgpu only set error notifier for the reporting channel
but abort the whole tsg. When the tsg is aborted, all channels of the
tsg are supposed have had their error notifiers set. Set it for all
channels once any of the channels gets an MMU fault.
For now, RM server still reports num-of-channel times for tsg mmu fault.
We may optimize it in future.
Jira VFND-3798
Change-Id: I6deaca55e7420899af8eabec72ad888d2726ad3c
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1588098
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 2874e256..121a52f1 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -736,7 +736,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch, | |||
736 | return err ? err : msg.ret; | 736 | return err ? err : msg.ret; |
737 | } | 737 | } |
738 | 738 | ||
739 | static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g, | 739 | static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g, |
740 | struct channel_gk20a *ch) | 740 | struct channel_gk20a *ch) |
741 | { | 741 | { |
742 | nvgpu_mutex_acquire(&ch->error_notifier_mutex); | 742 | nvgpu_mutex_acquire(&ch->error_notifier_mutex); |
@@ -761,6 +761,30 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g, | |||
761 | nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); | 761 | nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); |
762 | } | 762 | } |
763 | 763 | ||
764 | static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g, | ||
765 | struct channel_gk20a *ch) | ||
766 | { | ||
767 | struct tsg_gk20a *tsg = NULL; | ||
768 | struct channel_gk20a *ch_tsg = NULL; | ||
769 | |||
770 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
771 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
772 | |||
773 | nvgpu_rwsem_down_read(&tsg->ch_list_lock); | ||
774 | |||
775 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { | ||
776 | if (gk20a_channel_get(ch_tsg)) { | ||
777 | vgpu_fifo_set_ctx_mmu_error_ch(g, ch_tsg); | ||
778 | gk20a_channel_put(ch_tsg); | ||
779 | } | ||
780 | } | ||
781 | |||
782 | nvgpu_rwsem_up_read(&tsg->ch_list_lock); | ||
783 | } else { | ||
784 | vgpu_fifo_set_ctx_mmu_error_ch(g, ch); | ||
785 | } | ||
786 | } | ||
787 | |||
764 | int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) | 788 | int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) |
765 | { | 789 | { |
766 | struct fifo_gk20a *f = &g->fifo; | 790 | struct fifo_gk20a *f = &g->fifo; |
@@ -784,7 +808,7 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) | |||
784 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); | 808 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); |
785 | break; | 809 | break; |
786 | case TEGRA_VGPU_FIFO_INTR_MMU_FAULT: | 810 | case TEGRA_VGPU_FIFO_INTR_MMU_FAULT: |
787 | vgpu_fifo_set_ctx_mmu_error(g, ch); | 811 | vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch); |
788 | gk20a_channel_abort(ch, false); | 812 | gk20a_channel_abort(ch, false); |
789 | break; | 813 | break; |
790 | default: | 814 | default: |