gpu: nvgpu: add per-channel refcounting

Add reference counting for channels, and wait for reference count to get to 0 in gk20a_channel_free() before actually freeing the channel. Also, change free channel tracking a bit by employing a list of free channels, which simplifies the procedure of finding available channels with reference counting. Each use of a channel must have a reference taken before use or held by the caller. Taking a reference of a wild channel pointer may fail, if the channel is either not opened or in a process of being closed. Also, add safeguards for protecting accidental use of closed channels, specifically, by setting ch->g = NULL in channel free. This will make it obvious if freed channel is attempted to be used. The last user of a channel might be the deferred interrupt handler, so wait for deferred interrupts to be processed twice in the channel free procedure: once for providing last notifications to the channel and once to make sure there are no stale pointers left after referencing to the channel has been denied. Finally, fix some races in channel and TSG force reset IOCTL path, by pausing the channel scheduler in gk20a_fifo_recover_ch() and gk20a_fifo_recover_tsg(), while the affected engines have been identified, the appropriate MMU faults triggered, and the MMU faults handled. In this case, make sure that the MMU fault does not attempt to query the hardware about the failing channel or TSG ids. This should make channel recovery more safe also in the regular (i.e., not in the interrupt handler) context. Bug 1530226 Bug 1597493 Bug 1625901 Bug 200076344 Bug 200071810 Change-Id: Ib274876908e18219c64ea41e50ca443df81d957b Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/448463 (cherry picked from commit 3f03aeae64ef2af4829e06f5f63062e8ebd21353) Reviewed-on: http://git-master/r/755147 Reviewed-by: Automatic_Commit_Validation_User
author: Konsta Holtta <kholtta@nvidia.com> 2015-03-06 09:33:43 -0500
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-06-09 14:13:43 -0400
commit: 6085c90f499c642bc41a646b0efbdfe60e096c74 (patch)
tree: 0eaab99b228ce162ec3a44d0f8138b441f5a64f4 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
parent: a41e5c41cadaa3d030a1f75b09328b8b1a440b69 (diff)
1 files changed, 48 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 8cc852c7..7a707fbd 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -154,8 +154,23 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
 static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
 {
-        struct channel_gk20a *ch20a = priv;
+        struct channel_gk20a *ch = priv;
-        gk20a_channel_update(ch20a, nr_completed);
+        struct gk20a *g = ch->g;
+        /* need busy for possible channel deletion */
+        if (gk20a_busy(ch->g->dev)) {
+                gk20a_err(dev_from_gk20a(ch->g),
+                                "failed to busy while syncpt update");
+                /* Last gk20a_idle()s are in channel_update, so we shouldn't
+                 * get here. If we do, the channel is badly broken now */
+                return;
+        }
+        /* note: channel_get() is in __gk20a_channel_syncpt_incr() */
+        gk20a_channel_update(ch, nr_completed);
+        gk20a_channel_put(ch);
+        gk20a_idle(g->dev);
 }
 static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
@@ -209,14 +224,37 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
        thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
        if (register_irq) {
-                err = nvhost_intr_register_notifier(sp->host1x_pdev,
+                err = gk20a_busy(c->g->dev);
-                                sp->id, thresh,
+                if (err)
-                                gk20a_channel_syncpt_update, c);
+                        gk20a_err(dev_from_gk20a(c->g),
+                                  "failed to add syncpt interrupt notifier for channel %d",
-                /* Adding interrupt action should never fail. A proper error
+                                  c->hw_chid);
-                 * handling here would require us to decrement the syncpt max
+                else {
-                 * back to its original value. */
+                        struct channel_gk20a *referenced = gk20a_channel_get(c);
-                WARN(err, "failed to set submit complete interrupt");
+                        WARN_ON(!referenced);
+                        gk20a_idle(c->g->dev);
+                        if (referenced) {
+                                /* note: channel_put() is in
+                                 * gk20a_channel_syncpt_update() */
+                                err = nvhost_intr_register_notifier(
+                                        sp->host1x_pdev,
+                                        sp->id, thresh,
+                                        gk20a_channel_syncpt_update, c);
+                                if (err)
+                                        gk20a_channel_put(referenced);
+                                /* Adding interrupt action should
+                                 * never fail. A proper error handling
+                                 * here would require us to decrement
+                                 * the syncpt max back to its original
+                                 * value. */
+                                WARN(err,
+                                     "failed to set submit complete interrupt");
+                        }
+                }
        }
        *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
author	Konsta Holtta <kholtta@nvidia.com>	2015-03-06 09:33:43 -0500
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-06-09 14:13:43 -0400
commit	6085c90f499c642bc41a646b0efbdfe60e096c74 (patch)
tree	0eaab99b228ce162ec3a44d0f8138b441f5a64f4 /drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
parent	a41e5c41cadaa3d030a1f75b09328b8b1a440b69 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 8cc852c7..7a707fbd 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -154,8 +154,23 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
154		154
155	static void gk20a_channel_syncpt_update(void *priv, int nr_completed)	155	static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
156	{	156	{
157	struct channel_gk20a *ch20a = priv;	157	struct channel_gk20a *ch = priv;
158	gk20a_channel_update(ch20a, nr_completed);	158	struct gk20a *g = ch->g;
		159
		160	/* need busy for possible channel deletion */
		161	if (gk20a_busy(ch->g->dev)) {
		162	gk20a_err(dev_from_gk20a(ch->g),
		163	"failed to busy while syncpt update");
		164	/* Last gk20a_idle()s are in channel_update, so we shouldn't
		165	* get here. If we do, the channel is badly broken now */
		166	return;
		167	}
		168
		169	/* note: channel_get() is in __gk20a_channel_syncpt_incr() */
		170	gk20a_channel_update(ch, nr_completed);
		171	gk20a_channel_put(ch);
		172
		173	gk20a_idle(g->dev);
159	}	174	}
160		175
161	static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,	176	static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
@@ -209,14 +224,37 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
209	thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);	224	thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
210		225
211	if (register_irq) {	226	if (register_irq) {
212	err = nvhost_intr_register_notifier(sp->host1x_pdev,	227	err = gk20a_busy(c->g->dev);
213	sp->id, thresh,	228	if (err)
214	gk20a_channel_syncpt_update, c);	229	gk20a_err(dev_from_gk20a(c->g),
215		230	"failed to add syncpt interrupt notifier for channel %d",
216	/* Adding interrupt action should never fail. A proper error	231	c->hw_chid);
217	* handling here would require us to decrement the syncpt max	232	else {
218	* back to its original value. */	233	struct channel_gk20a *referenced = gk20a_channel_get(c);
219	WARN(err, "failed to set submit complete interrupt");	234
		235	WARN_ON(!referenced);
		236	gk20a_idle(c->g->dev);
		237
		238	if (referenced) {
		239	/* note: channel_put() is in
		240	* gk20a_channel_syncpt_update() */
		241
		242	err = nvhost_intr_register_notifier(
		243	sp->host1x_pdev,
		244	sp->id, thresh,
		245	gk20a_channel_syncpt_update, c);
		246	if (err)
		247	gk20a_channel_put(referenced);
		248
		249	/* Adding interrupt action should
		250	* never fail. A proper error handling
		251	* here would require us to decrement
		252	* the syncpt max back to its original
		253	* value. */
		254	WARN(err,
		255	"failed to set submit complete interrupt");
		256	}
		257	}
220	}	258	}
221		259
222	*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,	260	*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,