From 5eedf06bf56489bc559a08347f60a7680ccd6897 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Sun, 29 Oct 2017 22:30:04 -0700
Subject: gpu: nvgpu: vgpu: set mmu error for all channels of a tsg at once

In current code vgpu only set error notifier for the reporting channel
but abort the whole tsg. When the tsg is aborted, all channels of the
tsg are supposed have had their error notifiers set. Set it for all
channels once any of the channels gets an MMU fault.

For now, RM server still reports num-of-channel times for tsg mmu fault.
We may optimize it in future.

Jira VFND-3798

Change-Id: I6deaca55e7420899af8eabec72ad888d2726ad3c
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1588098
Reviewed-by: Aingara Paramakuru <aparamakuru@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/nvgpu/vgpu/fifo_vgpu.c')

diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 2874e256..121a52f1 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -736,7 +736,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 	return err ? err : msg.ret;
 }
 
-static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
+static void vgpu_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
 		struct channel_gk20a *ch)
 {
 	nvgpu_mutex_acquire(&ch->error_notifier_mutex);
@@ -761,6 +761,30 @@ static void vgpu_fifo_set_ctx_mmu_error(struct gk20a *g,
 	nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
 }
 
+static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g,
+		struct channel_gk20a *ch)
+{
+	struct tsg_gk20a *tsg = NULL;
+	struct channel_gk20a *ch_tsg = NULL;
+
+	if (gk20a_is_channel_marked_as_tsg(ch)) {
+		tsg = &g->fifo.tsg[ch->tsgid];
+
+		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+
+		list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
+			if (gk20a_channel_get(ch_tsg)) {
+				vgpu_fifo_set_ctx_mmu_error_ch(g, ch_tsg);
+				gk20a_channel_put(ch_tsg);
+			}
+		}
+
+		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+	} else {
+		vgpu_fifo_set_ctx_mmu_error_ch(g, ch);
+	}
+}
+
 int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
 {
 	struct fifo_gk20a *f = &g->fifo;
@@ -784,7 +808,7 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
 					NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
 		break;
 	case TEGRA_VGPU_FIFO_INTR_MMU_FAULT:
-		vgpu_fifo_set_ctx_mmu_error(g, ch);
+		vgpu_fifo_set_ctx_mmu_error_ch_tsg(g, ch);
 		gk20a_channel_abort(ch, false);
 		break;
 	default:
-- 
cgit v1.2.2