From 3794afbeb177ed0932d166d30bb2af9d9859dff9 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Tue, 13 Nov 2018 15:36:19 +0200
Subject: gpu: nvgpu: add safe channel id lookup

Add gk20a_channel_from_id() to retrieve a channel, given a raw channel
ID, with a reference taken (or NULL if the channel was dead). This makes
it harder to mistakenly use a channel that's dead and thus uncovers bugs
sooner. Convert code to use the new lookup when applicable; work remains
to convert complex uses where a ref should have been taken but hasn't.

The channel ID is also validated against FIFO_INVAL_CHANNEL_ID; NULL is
returned for such IDs. This is often useful and does not hurt when
unnecessary.

However, this does not prevent the case where a channel would be closed
and reopened again when someone would hold a stale channel number. In
all such conditions the caller should hold a reference already.

The only conditions where a channel can be safely looked up by an id and
used without taking a ref are when initializing or deinitializing the
list of channels.

Jira NVGPU-1460

Change-Id: I0a30968d17c1e0784d315a676bbe69c03a73481c
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1955400
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
(cherry picked from commit 7df3d587502c2de997dfbe8ea8ddc114d0a0481e
in dev-kernel)
Reviewed-on: https://git-master.nvidia.com/r/2008515
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/channel.c   | 50 ++++++++++++++++++++-----------
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c      | 18 +++++------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c        |  7 +++--
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c        |  9 +++---
 drivers/gpu/nvgpu/include/nvgpu/channel.h |  5 ++++
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c        |  3 +-
 drivers/gpu/nvgpu/vgpu/gr_vgpu.c          |  4 +--
 drivers/gpu/nvgpu/vgpu/vgpu.c             |  2 +-
 8 files changed, 59 insertions(+), 39 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 54fa4747..45c02a75 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -626,6 +626,18 @@ void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
 	WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
 }
 
+struct channel_gk20a *_gk20a_channel_from_id(struct gk20a *g, u32 chid,
+					 const char *caller)
+{
+	nvgpu_log_fn(g, " ");
+
+	if (chid == FIFO_INVAL_CHANNEL_ID) {
+		return NULL;
+	}
+
+	return _gk20a_channel_get(&g->fifo.channel[chid], caller);
+}
+
 void gk20a_channel_close(struct channel_gk20a *ch)
 {
 	gk20a_free_channel(ch, false);
@@ -1450,11 +1462,10 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
 	u32 chid;
 
 	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
 
-		if (gk20a_channel_get(ch) == NULL) {
+		if (ch == NULL)
 			continue;
-		}
 
 		nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 		if (ch->timeout.running) {
@@ -1553,9 +1564,9 @@ static void gk20a_channel_poll_timeouts(struct gk20a *g)
 
 
 	for (chid = 0; chid < g->fifo.num_channels; chid++) {
-		struct channel_gk20a *ch = &g->fifo.channel[chid];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
 
-		if (gk20a_channel_get(ch)) {
+		if (ch != NULL) {
 			gk20a_channel_timeout_check(ch);
 			gk20a_channel_put(ch);
 		}
@@ -2107,9 +2118,9 @@ void gk20a_channel_deterministic_idle(struct gk20a *g)
 	nvgpu_rwsem_down_write(&g->deterministic_busy);
 
 	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
 
-		if (gk20a_channel_get(ch) == NULL) {
+		if (ch == NULL) {
 			continue;
 		}
 
@@ -2145,9 +2156,9 @@ void gk20a_channel_deterministic_unidle(struct gk20a *g)
 	u32 chid;
 
 	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
 
-		if (gk20a_channel_get(ch) == NULL) {
+		if (ch == NULL) {
 			continue;
 		}
 
@@ -2256,8 +2267,9 @@ int gk20a_channel_suspend(struct gk20a *g)
 	nvgpu_log_fn(g, " ");
 
 	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-		if (gk20a_channel_get(ch) != NULL) {
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
+
+		if (ch != NULL) {
 			nvgpu_log_info(g, "suspend channel %d", chid);
 			/* disable channel */
 			gk20a_disable_channel_tsg(g, ch);
@@ -2280,9 +2292,11 @@ int gk20a_channel_suspend(struct gk20a *g)
 		gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true);
 
 		for (chid = 0; chid < f->num_channels; chid++) {
-			if (gk20a_channel_get(&f->channel[chid])) {
-				g->ops.fifo.unbind_channel(&f->channel[chid]);
-				gk20a_channel_put(&f->channel[chid]);
+			struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
+
+			if (ch != NULL) {
+				g->ops.fifo.unbind_channel(ch);
+				gk20a_channel_put(ch);
 			}
 		}
 	}
@@ -2301,12 +2315,14 @@ int gk20a_channel_resume(struct gk20a *g)
 	nvgpu_log_fn(g, " ");
 
 	for (chid = 0; chid < f->num_channels; chid++) {
-		if (gk20a_channel_get(&f->channel[chid])) {
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
+
+		if (ch != NULL) {
 			nvgpu_log_info(g, "resume channel %d", chid);
-			g->ops.fifo.bind_channel(&f->channel[chid]);
+			g->ops.fifo.bind_channel(ch);
 			channels_in_use = true;
 			active_runlist_ids |= BIT(f->channel[chid].runlist_id);
-			gk20a_channel_put(&f->channel[chid]);
+			gk20a_channel_put(ch);
 		}
 	}
 
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 9691d51b..f4a4591d 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1172,7 +1172,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
 		struct channel_gk20a *ch;
 		u64 ch_inst_ptr;
 
-		ch = gk20a_channel_get(&f->channel[ci]);
+		ch = gk20a_channel_from_id(g, ci);
 		/* only alive channels are searched */
 		if (!ch) {
 			continue;
@@ -1959,9 +1959,9 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose, int rc_type)
 		gk20a_fifo_recover(g, engines, chid, false, true, verbose,
 					rc_type);
 	} else {
-		struct channel_gk20a *ch = &g->fifo.channel[chid];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
 
-		if (gk20a_channel_get(ch)) {
+		if (ch != NULL) {
 			gk20a_channel_abort(ch, false);
 
 			if (gk20a_fifo_error_ch(g, ch)) {
@@ -2710,9 +2710,9 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g,
 	id = fifo_pbdma_status_id_v(status);
 	if (fifo_pbdma_status_id_type_v(status)
 			== fifo_pbdma_status_id_type_chid_v()) {
-		struct channel_gk20a *ch = &f->channel[id];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
 
-		if (gk20a_channel_get(ch)) {
+		if (ch != NULL) {
 			g->ops.fifo.set_error_notifier(ch, error_notifier);
 			gk20a_fifo_recover_ch(g, id, true, RC_TYPE_PBDMA_FAULT);
 			gk20a_channel_put(ch);
@@ -2924,12 +2924,12 @@ void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
 		gk20a_fifo_recover_tsg(g, id, true,
 						RC_TYPE_PREEMPT_TIMEOUT);
 	} else {
-		struct channel_gk20a *ch = &g->fifo.channel[id];
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
 
 		nvgpu_err(g,
 			"preempt channel %d timeout", id);
 
-		if (gk20a_channel_get(ch)) {
+		if (ch != NULL) {
 			g->ops.fifo.set_error_notifier(ch,
 					NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 			gk20a_fifo_recover_ch(g, id, true,
@@ -4031,8 +4031,8 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
 	}
 
 	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-		if (gk20a_channel_get(ch)) {
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
+		if (ch != NULL) {
 			ch_state[chid] =
 				nvgpu_kmalloc(g, sizeof(struct ch_state) +
 					ram_in_alloc_size_v());
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 8cb66279..928d80cb 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5581,15 +5581,16 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
 		if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
 			chid = gr->chid_tlb[i].chid;
 			tsgid = gr->chid_tlb[i].tsgid;
-			ret = gk20a_channel_get(&f->channel[chid]);
+			ret = gk20a_channel_from_id(g, chid);
 			goto unlock;
 		}
 	}
 
 	/* slow path */
 	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-		if (gk20a_channel_get(ch) == NULL) {
+		struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
+
+		if (ch == NULL) {
 			continue;
 		}
 
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index f70a5a00..a3655146 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1961,8 +1961,8 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
 
 	chid = g->gr.cilp_preempt_pending_chid;
 
-	ch = gk20a_channel_get(gk20a_fifo_channel_from_chid(g, chid));
-	if (!ch) {
+	ch = gk20a_channel_from_id(g, chid);
+	if (ch == NULL) {
 		return ret;
 	}
 
@@ -2014,9 +2014,8 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
 			goto clean_up;
 		}
 
-		ch = gk20a_channel_get(
-				gk20a_fifo_channel_from_chid(g, chid));
-		if (!ch) {
+		ch = gk20a_channel_from_id(g, chid);
+		if (ch == NULL) {
 			goto clean_up;
 		}
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 6cdcb973..1851b9e2 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -393,6 +393,11 @@ struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
 #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
 
+/* returns NULL if could not take a ref to the channel */
+struct channel_gk20a *__must_check _gk20a_channel_from_id(struct gk20a *g,
+		u32 chid, const char *caller);
+#define gk20a_channel_from_id(g, chid) _gk20a_channel_from_id(g, chid, __func__)
+
 int gk20a_wait_channel_idle(struct channel_gk20a *ch);
 
 /* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index aa5abec9..8821e799 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -721,8 +721,7 @@ static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g,
 
 int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
 {
-	struct fifo_gk20a *f = &g->fifo;
-	struct channel_gk20a *ch = gk20a_channel_get(&f->channel[info->chid]);
+	struct channel_gk20a *ch = gk20a_channel_from_id(g, info->chid);
 
 	nvgpu_log_fn(g, " ");
 	if (!ch)
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 9fafa52f..6a86c9a0 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -953,10 +953,10 @@ int vgpu_init_gr_support(struct gk20a *g)
 
 int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
 {
-	struct fifo_gk20a *f = &g->fifo;
-	struct channel_gk20a *ch = gk20a_channel_get(&f->channel[info->chid]);
+	struct channel_gk20a *ch = gk20a_channel_from_id(g, info->chid);
 
 	nvgpu_log_fn(g, " ");
+
 	if (!ch)
 		return 0;
 
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 266b801e..07361afe 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -119,7 +119,7 @@ static void vgpu_handle_channel_event(struct gk20a *g,
 
 static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid)
 {
-	struct channel_gk20a *ch = gk20a_channel_get(&g->fifo.channel[chid]);
+	struct channel_gk20a *ch = gk20a_channel_from_id(g, chid);
 
 	if (ch == NULL) {
 		nvgpu_err(g, "invalid channel id %d", chid);
-- 
cgit v1.2.2