gpu: nvgpu: add per-channel refcounting

Add reference counting for channels, and wait for reference count to get to 0 in gk20a_channel_free() before actually freeing the channel. Also, change free channel tracking a bit by employing a list of free channels, which simplifies the procedure of finding available channels with reference counting. Each use of a channel must have a reference taken before use or held by the caller. Taking a reference of a wild channel pointer may fail, if the channel is either not opened or in a process of being closed. Also, add safeguards for protecting accidental use of closed channels, specifically, by setting ch->g = NULL in channel free. This will make it obvious if freed channel is attempted to be used. The last user of a channel might be the deferred interrupt handler, so wait for deferred interrupts to be processed twice in the channel free procedure: once for providing last notifications to the channel and once to make sure there are no stale pointers left after referencing to the channel has been denied. Finally, fix some races in channel and TSG force reset IOCTL path, by pausing the channel scheduler in gk20a_fifo_recover_ch() and gk20a_fifo_recover_tsg(), while the affected engines have been identified, the appropriate MMU faults triggered, and the MMU faults handled. In this case, make sure that the MMU fault does not attempt to query the hardware about the failing channel or TSG ids. This should make channel recovery more safe also in the regular (i.e., not in the interrupt handler) context. Bug 1530226 Bug 1597493 Bug 1625901 Bug 200076344 Bug 200071810 Change-Id: Ib274876908e18219c64ea41e50ca443df81d957b Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/448463 (cherry picked from commit 3f03aeae64ef2af4829e06f5f63062e8ebd21353) Reviewed-on: http://git-master/r/755147 Reviewed-by: Automatic_Commit_Validation_User
author: Konsta Holtta <kholtta@nvidia.com> 2015-03-06 09:33:43 -0500
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-06-09 14:13:43 -0400
commit: 6085c90f499c642bc41a646b0efbdfe60e096c74 (patch)
tree: 0eaab99b228ce162ec3a44d0f8138b441f5a64f4 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent: a41e5c41cadaa3d030a1f75b09328b8b1a440b69 (diff)
1 files changed, 60 insertions, 33 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b2fea5b8..edd4c6c8 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
 * Also used by regops to translate current ctx to chid and tsgid.
 * For performance, we don't want to go through 128 channels every time.
 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
- * A small tlb is used here to cache translation */
+ * A small tlb is used here to cache translation.
-static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
+ *
-                                      int *curr_tsgid)
+ * Returned channel must be freed with gk20a_channel_put() */
+static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
+        struct gk20a *g, u32 curr_ctx, int *curr_tsgid)
 {
        struct fifo_gk20a *f = &g->fifo;
        struct gr_gk20a *gr = &g->gr;
        u32 chid = -1;
        int tsgid = NVGPU_INVALID_TSG_ID;
        u32 i;
+        struct channel_gk20a *ret = NULL;
        /* when contexts are unloaded from GR, the valid bit is reset
         * but the instance pointer information remains intact. So the
         * valid bit must be checked to be absolutely certain that a
         * valid context is currently resident. */
        if (!gr_fecs_current_ctx_valid_v(curr_ctx))
-                return -1;
+                return NULL;
        spin_lock(&gr->ch_tlb_lock);
@@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
                if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
                        chid = gr->chid_tlb[i].hw_chid;
                        tsgid = gr->chid_tlb[i].tsgid;
+                        ret = gk20a_channel_get(&f->channel[chid]);
                        goto unlock;
                }
        }
        /* slow path */
-        for (chid = 0; chid < f->num_channels; chid++)
+        for (chid = 0; chid < f->num_channels; chid++) {
-                if (f->channel[chid].in_use) {
+                struct channel_gk20a *ch = &f->channel[chid];
-                        if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >>
+                if (!gk20a_channel_get(ch))
-                                ram_in_base_shift_v()) ==
+                        continue;
+                if ((u32)(gk20a_mem_phys(&ch->inst_block) >>
+                                        ram_in_base_shift_v()) ==
                                gr_fecs_current_ctx_ptr_v(curr_ctx)) {
-                                tsgid = f->channel[chid].tsgid;
+                        tsgid = ch->tsgid;
-                                break;
+                        /* found it */
-                        }
+                        ret = ch;
+                        break;
+                }
+                gk20a_channel_put(ch);
        }
-        if (chid >= f->num_channels) {
+        if (!ret)
-                chid = -1;
                goto unlock;
-        }
        /* add to free tlb entry */
        for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5205,7 +5213,7 @@ unlock:
        spin_unlock(&gr->ch_tlb_lock);
        if (curr_tsgid)
                *curr_tsgid = tsgid;
-        return chid;
+        return ret;
 }
 int gk20a_gr_lock_down_sm(struct gk20a *g,
@@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g)
        u32 obj_table;
        int need_reset = 0;
        u32 gr_intr = gk20a_readl(g, gr_intr_r());
+        struct channel_gk20a *ch = NULL;
        gk20a_dbg_fn("");
        gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g)
                gr_fe_object_table_r(isr_data.sub_chan)) : 0;
        isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
-        isr_data.chid =
+        ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL);
-                gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL);
+        if (!ch) {
-        if (isr_data.chid == -1) {
                gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
                           isr_data.curr_ctx);
                goto clean_up;
        }
+        isr_data.chid = ch->hw_chid;
        gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
                "channel %d: addr 0x%08x, "
@@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g)
        if (gr_intr & gr_intr_exception_pending_f()) {
                u32 exception = gk20a_readl(g, gr_exception_r());
-                struct fifo_gk20a *f = &g->fifo;
-                struct channel_gk20a *ch = &f->channel[isr_data.chid];
                gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
@@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g)
        }
        if (need_reset)
-                gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true);
+                gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
+                                   ~(u32)0, false, true);
 clean_up:
+        if (gr_intr && !ch) {
+                /* Clear interrupts for unused channel. This is
+                   probably an interrupt during gk20a_free_channel() */
+                gk20a_err(dev_from_gk20a(g),
+                          "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing",
+                          gr_intr);
+                gk20a_writel(g, gr_intr_r(), gr_intr);
+                gr_intr = 0;
+        }
        gk20a_writel(g, gr_gpfifo_ctl_r(),
                grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
                gr_gpfifo_ctl_semaphore_access_f(1));
@@ -5583,6 +5601,9 @@ clean_up:
                gk20a_err(dev_from_gk20a(g),
                           "unhandled gr interrupt 0x%08x", gr_intr);
+        if (ch)
+                gk20a_channel_put(ch);
        return 0;
 }
@@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
 bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
 {
-        int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid;
+        int curr_gr_ctx, curr_gr_tsgid;
        struct gk20a *g = ch->g;
+        struct channel_gk20a *curr_ch;
+        bool ret = false;
        curr_gr_ctx  = gk20a_readl(g, gr_fecs_current_ctx_r());
-        curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx,
+        curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
-                                                  &curr_gr_tsgid);
+                                              &curr_gr_tsgid);
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
-                        "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
+                  "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
-                        " ch->hw_chid=%d", curr_gr_chid,
+                  " ch->hw_chid=%d",
-                        curr_gr_tsgid, ch->tsgid, ch->hw_chid);
+                  curr_ch ? curr_ch->hw_chid : -1,
+                  curr_gr_tsgid,
-        if (curr_gr_chid == -1)
+                  ch->tsgid,
+                  ch->hw_chid);
+        if (!curr_ch)
                return false;
-        if (ch->hw_chid == curr_gr_chid)
+        if (ch->hw_chid == curr_ch->hw_chid)
-                return true;
+                ret = true;
        if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))
-                return true;
+                ret = true;
-        return false;
+        gk20a_channel_put(curr_ch);
+        return ret;
 }
 int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
author	Konsta Holtta <kholtta@nvidia.com>	2015-03-06 09:33:43 -0500
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-06-09 14:13:43 -0400
commit	6085c90f499c642bc41a646b0efbdfe60e096c74 (patch)
tree	0eaab99b228ce162ec3a44d0f8138b441f5a64f4 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent	a41e5c41cadaa3d030a1f75b09328b8b1a440b69 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index b2fea5b8..edd4c6c8 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
5138	* Also used by regops to translate current ctx to chid and tsgid.	5138	* Also used by regops to translate current ctx to chid and tsgid.
5139	* For performance, we don't want to go through 128 channels every time.	5139	* For performance, we don't want to go through 128 channels every time.
5140	* curr_ctx should be the value read from gr_fecs_current_ctx_r().	5140	* curr_ctx should be the value read from gr_fecs_current_ctx_r().
5141	* A small tlb is used here to cache translation */	5141	* A small tlb is used here to cache translation.
5142	static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,	5142	*
5143	int *curr_tsgid)	5143	* Returned channel must be freed with gk20a_channel_put() */
		5144	static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
		5145	struct gk20a g, u32 curr_ctx, int curr_tsgid)
5144	{	5146	{
5145	struct fifo_gk20a *f = &g->fifo;	5147	struct fifo_gk20a *f = &g->fifo;
5146	struct gr_gk20a *gr = &g->gr;	5148	struct gr_gk20a *gr = &g->gr;
5147	u32 chid = -1;	5149	u32 chid = -1;
5148	int tsgid = NVGPU_INVALID_TSG_ID;	5150	int tsgid = NVGPU_INVALID_TSG_ID;
5149	u32 i;	5151	u32 i;
		5152	struct channel_gk20a *ret = NULL;
5150		5153
5151	/* when contexts are unloaded from GR, the valid bit is reset	5154	/* when contexts are unloaded from GR, the valid bit is reset
5152	* but the instance pointer information remains intact. So the	5155	* but the instance pointer information remains intact. So the
5153	* valid bit must be checked to be absolutely certain that a	5156	* valid bit must be checked to be absolutely certain that a
5154	* valid context is currently resident. */	5157	* valid context is currently resident. */
5155	if (!gr_fecs_current_ctx_valid_v(curr_ctx))	5158	if (!gr_fecs_current_ctx_valid_v(curr_ctx))
5156	return -1;	5159	return NULL;
5157		5160
5158	spin_lock(&gr->ch_tlb_lock);	5161	spin_lock(&gr->ch_tlb_lock);
5159		5162
@@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5162	if (gr->chid_tlb[i].curr_ctx == curr_ctx) {	5165	if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5163	chid = gr->chid_tlb[i].hw_chid;	5166	chid = gr->chid_tlb[i].hw_chid;
5164	tsgid = gr->chid_tlb[i].tsgid;	5167	tsgid = gr->chid_tlb[i].tsgid;
		5168	ret = gk20a_channel_get(&f->channel[chid]);
5165	goto unlock;	5169	goto unlock;
5166	}	5170	}
5167	}	5171	}
5168		5172
5169	/* slow path */	5173	/* slow path */
5170	for (chid = 0; chid < f->num_channels; chid++)	5174	for (chid = 0; chid < f->num_channels; chid++) {
5171	if (f->channel[chid].in_use) {	5175	struct channel_gk20a *ch = &f->channel[chid];
5172	if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >>	5176	if (!gk20a_channel_get(ch))
5173	ram_in_base_shift_v()) ==	5177	continue;
		5178
		5179	if ((u32)(gk20a_mem_phys(&ch->inst_block) >>
		5180	ram_in_base_shift_v()) ==
5174	gr_fecs_current_ctx_ptr_v(curr_ctx)) {	5181	gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5175	tsgid = f->channel[chid].tsgid;	5182	tsgid = ch->tsgid;
5176	break;	5183	/* found it */
5177	}	5184	ret = ch;
		5185	break;
		5186	}
		5187	gk20a_channel_put(ch);
5178	}	5188	}
5179		5189
5180	if (chid >= f->num_channels) {	5190	if (!ret)
5181	chid = -1;
5182	goto unlock;	5191	goto unlock;
5183	}
5184		5192
5185	/* add to free tlb entry */	5193	/* add to free tlb entry */
5186	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {	5194	for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5205,7 +5213,7 @@ unlock:
5205	spin_unlock(&gr->ch_tlb_lock);	5213	spin_unlock(&gr->ch_tlb_lock);
5206	if (curr_tsgid)	5214	if (curr_tsgid)
5207	*curr_tsgid = tsgid;	5215	*curr_tsgid = tsgid;
5208	return chid;	5216	return ret;
5209	}	5217	}
5210		5218
5211	int gk20a_gr_lock_down_sm(struct gk20a *g,	5219	int gk20a_gr_lock_down_sm(struct gk20a *g,
@@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g)
5399	u32 obj_table;	5407	u32 obj_table;
5400	int need_reset = 0;	5408	int need_reset = 0;
5401	u32 gr_intr = gk20a_readl(g, gr_intr_r());	5409	u32 gr_intr = gk20a_readl(g, gr_intr_r());
		5410	struct channel_gk20a *ch = NULL;
5402		5411
5403	gk20a_dbg_fn("");	5412	gk20a_dbg_fn("");
5404	gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);	5413	gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g)
5424	gr_fe_object_table_r(isr_data.sub_chan)) : 0;	5433	gr_fe_object_table_r(isr_data.sub_chan)) : 0;
5425	isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);	5434	isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5426		5435
5427	isr_data.chid =	5436	ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL);
5428	gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL);	5437	if (!ch) {
5429	if (isr_data.chid == -1) {
5430	gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",	5438	gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5431	isr_data.curr_ctx);	5439	isr_data.curr_ctx);
5432	goto clean_up;	5440	goto clean_up;
5433	}	5441	}
		5442	isr_data.chid = ch->hw_chid;
5434		5443
5435	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,	5444	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg,
5436	"channel %d: addr 0x%08x, "	5445	"channel %d: addr 0x%08x, "
@@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g)
5512		5521
5513	if (gr_intr & gr_intr_exception_pending_f()) {	5522	if (gr_intr & gr_intr_exception_pending_f()) {
5514	u32 exception = gk20a_readl(g, gr_exception_r());	5523	u32 exception = gk20a_readl(g, gr_exception_r());
5515	struct fifo_gk20a *f = &g->fifo;
5516	struct channel_gk20a *ch = &f->channel[isr_data.chid];
5517		5524
5518	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg, "exception %08x\n", exception);	5525	gk20a_dbg(gpu_dbg_intr \| gpu_dbg_gpu_dbg, "exception %08x\n", exception);
5519		5526
@@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g)
5572	}	5579	}
5573		5580
5574	if (need_reset)	5581	if (need_reset)
5575	gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true);	5582	gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
		5583	~(u32)0, false, true);
5576		5584
5577	clean_up:	5585	clean_up:
		5586	if (gr_intr && !ch) {
		5587	/* Clear interrupts for unused channel. This is
		5588	probably an interrupt during gk20a_free_channel() */
		5589	gk20a_err(dev_from_gk20a(g),
		5590	"unhandled gr interrupt 0x%08x for unreferenceable channel, clearing",
		5591	gr_intr);
		5592	gk20a_writel(g, gr_intr_r(), gr_intr);
		5593	gr_intr = 0;
		5594	}
		5595
5578	gk20a_writel(g, gr_gpfifo_ctl_r(),	5596	gk20a_writel(g, gr_gpfifo_ctl_r(),
5579	grfifo_ctl \| gr_gpfifo_ctl_access_f(1) \|	5597	grfifo_ctl \| gr_gpfifo_ctl_access_f(1) \|
5580	gr_gpfifo_ctl_semaphore_access_f(1));	5598	gr_gpfifo_ctl_semaphore_access_f(1));
@@ -5583,6 +5601,9 @@ clean_up:
5583	gk20a_err(dev_from_gk20a(g),	5601	gk20a_err(dev_from_gk20a(g),
5584	"unhandled gr interrupt 0x%08x", gr_intr);	5602	"unhandled gr interrupt 0x%08x", gr_intr);
5585		5603
		5604	if (ch)
		5605	gk20a_channel_put(ch);
		5606
5586	return 0;	5607	return 0;
5587	}	5608	}
5588		5609
@@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6670		6691
6671	bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)	6692	bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
6672	{	6693	{
6673	int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid;	6694	int curr_gr_ctx, curr_gr_tsgid;
6674	struct gk20a *g = ch->g;	6695	struct gk20a *g = ch->g;
		6696	struct channel_gk20a *curr_ch;
		6697	bool ret = false;
6675		6698
6676	curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());	6699	curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6677	curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx,	6700	curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
6678	&curr_gr_tsgid);	6701	&curr_gr_tsgid);
6679		6702
6680	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,	6703	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg,
6681	"curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"	6704	"curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
6682	" ch->hw_chid=%d", curr_gr_chid,	6705	" ch->hw_chid=%d",
6683	curr_gr_tsgid, ch->tsgid, ch->hw_chid);	6706	curr_ch ? curr_ch->hw_chid : -1,
6684		6707	curr_gr_tsgid,
6685	if (curr_gr_chid == -1)	6708	ch->tsgid,
		6709	ch->hw_chid);
		6710
		6711	if (!curr_ch)
6686	return false;	6712	return false;
6687		6713
6688	if (ch->hw_chid == curr_gr_chid)	6714	if (ch->hw_chid == curr_ch->hw_chid)
6689	return true;	6715	ret = true;
6690		6716
6691	if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))	6717	if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))
6692	return true;	6718	ret = true;
6693		6719
6694	return false;	6720	gk20a_channel_put(curr_ch);
		6721	return ret;
6695	}	6722	}
6696		6723
6697	int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,	6724	int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,