summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2015-03-06 09:33:43 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-09 14:13:43 -0400
commit6085c90f499c642bc41a646b0efbdfe60e096c74 (patch)
tree0eaab99b228ce162ec3a44d0f8138b441f5a64f4 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parenta41e5c41cadaa3d030a1f75b09328b8b1a440b69 (diff)
gpu: nvgpu: add per-channel refcounting
Add reference counting for channels, and wait for reference count to get to 0 in gk20a_channel_free() before actually freeing the channel. Also, change free channel tracking a bit by employing a list of free channels, which simplifies the procedure of finding available channels with reference counting. Each use of a channel must have a reference taken before use or held by the caller. Taking a reference of a wild channel pointer may fail, if the channel is either not opened or in a process of being closed. Also, add safeguards for protecting accidental use of closed channels, specifically, by setting ch->g = NULL in channel free. This will make it obvious if freed channel is attempted to be used. The last user of a channel might be the deferred interrupt handler, so wait for deferred interrupts to be processed twice in the channel free procedure: once for providing last notifications to the channel and once to make sure there are no stale pointers left after referencing to the channel has been denied. Finally, fix some races in channel and TSG force reset IOCTL path, by pausing the channel scheduler in gk20a_fifo_recover_ch() and gk20a_fifo_recover_tsg(), while the affected engines have been identified, the appropriate MMU faults triggered, and the MMU faults handled. In this case, make sure that the MMU fault does not attempt to query the hardware about the failing channel or TSG ids. This should make channel recovery more safe also in the regular (i.e., not in the interrupt handler) context. Bug 1530226 Bug 1597493 Bug 1625901 Bug 200076344 Bug 200071810 Change-Id: Ib274876908e18219c64ea41e50ca443df81d957b Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/448463 (cherry picked from commit 3f03aeae64ef2af4829e06f5f63062e8ebd21353) Reviewed-on: http://git-master/r/755147 Reviewed-by: Automatic_Commit_Validation_User
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c93
1 files changed, 60 insertions, 33 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b2fea5b8..edd4c6c8 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
5138 * Also used by regops to translate current ctx to chid and tsgid. 5138 * Also used by regops to translate current ctx to chid and tsgid.
5139 * For performance, we don't want to go through 128 channels every time. 5139 * For performance, we don't want to go through 128 channels every time.
5140 * curr_ctx should be the value read from gr_fecs_current_ctx_r(). 5140 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
5141 * A small tlb is used here to cache translation */ 5141 * A small tlb is used here to cache translation.
5142static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, 5142 *
5143 int *curr_tsgid) 5143 * Returned channel must be freed with gk20a_channel_put() */
5144static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5145 struct gk20a *g, u32 curr_ctx, int *curr_tsgid)
5144{ 5146{
5145 struct fifo_gk20a *f = &g->fifo; 5147 struct fifo_gk20a *f = &g->fifo;
5146 struct gr_gk20a *gr = &g->gr; 5148 struct gr_gk20a *gr = &g->gr;
5147 u32 chid = -1; 5149 u32 chid = -1;
5148 int tsgid = NVGPU_INVALID_TSG_ID; 5150 int tsgid = NVGPU_INVALID_TSG_ID;
5149 u32 i; 5151 u32 i;
5152 struct channel_gk20a *ret = NULL;
5150 5153
5151 /* when contexts are unloaded from GR, the valid bit is reset 5154 /* when contexts are unloaded from GR, the valid bit is reset
5152 * but the instance pointer information remains intact. So the 5155 * but the instance pointer information remains intact. So the
5153 * valid bit must be checked to be absolutely certain that a 5156 * valid bit must be checked to be absolutely certain that a
5154 * valid context is currently resident. */ 5157 * valid context is currently resident. */
5155 if (!gr_fecs_current_ctx_valid_v(curr_ctx)) 5158 if (!gr_fecs_current_ctx_valid_v(curr_ctx))
5156 return -1; 5159 return NULL;
5157 5160
5158 spin_lock(&gr->ch_tlb_lock); 5161 spin_lock(&gr->ch_tlb_lock);
5159 5162
@@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5162 if (gr->chid_tlb[i].curr_ctx == curr_ctx) { 5165 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5163 chid = gr->chid_tlb[i].hw_chid; 5166 chid = gr->chid_tlb[i].hw_chid;
5164 tsgid = gr->chid_tlb[i].tsgid; 5167 tsgid = gr->chid_tlb[i].tsgid;
5168 ret = gk20a_channel_get(&f->channel[chid]);
5165 goto unlock; 5169 goto unlock;
5166 } 5170 }
5167 } 5171 }
5168 5172
5169 /* slow path */ 5173 /* slow path */
5170 for (chid = 0; chid < f->num_channels; chid++) 5174 for (chid = 0; chid < f->num_channels; chid++) {
5171 if (f->channel[chid].in_use) { 5175 struct channel_gk20a *ch = &f->channel[chid];
5172 if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> 5176 if (!gk20a_channel_get(ch))
5173 ram_in_base_shift_v()) == 5177 continue;
5178
5179 if ((u32)(gk20a_mem_phys(&ch->inst_block) >>
5180 ram_in_base_shift_v()) ==
5174 gr_fecs_current_ctx_ptr_v(curr_ctx)) { 5181 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5175 tsgid = f->channel[chid].tsgid; 5182 tsgid = ch->tsgid;
5176 break; 5183 /* found it */
5177 } 5184 ret = ch;
5185 break;
5186 }
5187 gk20a_channel_put(ch);
5178 } 5188 }
5179 5189
5180 if (chid >= f->num_channels) { 5190 if (!ret)
5181 chid = -1;
5182 goto unlock; 5191 goto unlock;
5183 }
5184 5192
5185 /* add to free tlb entry */ 5193 /* add to free tlb entry */
5186 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { 5194 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5205,7 +5213,7 @@ unlock:
5205 spin_unlock(&gr->ch_tlb_lock); 5213 spin_unlock(&gr->ch_tlb_lock);
5206 if (curr_tsgid) 5214 if (curr_tsgid)
5207 *curr_tsgid = tsgid; 5215 *curr_tsgid = tsgid;
5208 return chid; 5216 return ret;
5209} 5217}
5210 5218
5211int gk20a_gr_lock_down_sm(struct gk20a *g, 5219int gk20a_gr_lock_down_sm(struct gk20a *g,
@@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g)
5399 u32 obj_table; 5407 u32 obj_table;
5400 int need_reset = 0; 5408 int need_reset = 0;
5401 u32 gr_intr = gk20a_readl(g, gr_intr_r()); 5409 u32 gr_intr = gk20a_readl(g, gr_intr_r());
5410 struct channel_gk20a *ch = NULL;
5402 5411
5403 gk20a_dbg_fn(""); 5412 gk20a_dbg_fn("");
5404 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); 5413 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g)
5424 gr_fe_object_table_r(isr_data.sub_chan)) : 0; 5433 gr_fe_object_table_r(isr_data.sub_chan)) : 0;
5425 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); 5434 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5426 5435
5427 isr_data.chid = 5436 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL);
5428 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); 5437 if (!ch) {
5429 if (isr_data.chid == -1) {
5430 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", 5438 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5431 isr_data.curr_ctx); 5439 isr_data.curr_ctx);
5432 goto clean_up; 5440 goto clean_up;
5433 } 5441 }
5442 isr_data.chid = ch->hw_chid;
5434 5443
5435 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5444 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5436 "channel %d: addr 0x%08x, " 5445 "channel %d: addr 0x%08x, "
@@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g)
5512 5521
5513 if (gr_intr & gr_intr_exception_pending_f()) { 5522 if (gr_intr & gr_intr_exception_pending_f()) {
5514 u32 exception = gk20a_readl(g, gr_exception_r()); 5523 u32 exception = gk20a_readl(g, gr_exception_r());
5515 struct fifo_gk20a *f = &g->fifo;
5516 struct channel_gk20a *ch = &f->channel[isr_data.chid];
5517 5524
5518 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); 5525 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
5519 5526
@@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g)
5572 } 5579 }
5573 5580
5574 if (need_reset) 5581 if (need_reset)
5575 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); 5582 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
5583 ~(u32)0, false, true);
5576 5584
5577clean_up: 5585clean_up:
5586 if (gr_intr && !ch) {
5587 /* Clear interrupts for unused channel. This is
5588 probably an interrupt during gk20a_free_channel() */
5589 gk20a_err(dev_from_gk20a(g),
5590 "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing",
5591 gr_intr);
5592 gk20a_writel(g, gr_intr_r(), gr_intr);
5593 gr_intr = 0;
5594 }
5595
5578 gk20a_writel(g, gr_gpfifo_ctl_r(), 5596 gk20a_writel(g, gr_gpfifo_ctl_r(),
5579 grfifo_ctl | gr_gpfifo_ctl_access_f(1) | 5597 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
5580 gr_gpfifo_ctl_semaphore_access_f(1)); 5598 gr_gpfifo_ctl_semaphore_access_f(1));
@@ -5583,6 +5601,9 @@ clean_up:
5583 gk20a_err(dev_from_gk20a(g), 5601 gk20a_err(dev_from_gk20a(g),
5584 "unhandled gr interrupt 0x%08x", gr_intr); 5602 "unhandled gr interrupt 0x%08x", gr_intr);
5585 5603
5604 if (ch)
5605 gk20a_channel_put(ch);
5606
5586 return 0; 5607 return 0;
5587} 5608}
5588 5609
@@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6670 6691
6671bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) 6692bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
6672{ 6693{
6673 int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; 6694 int curr_gr_ctx, curr_gr_tsgid;
6674 struct gk20a *g = ch->g; 6695 struct gk20a *g = ch->g;
6696 struct channel_gk20a *curr_ch;
6697 bool ret = false;
6675 6698
6676 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); 6699 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6677 curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, 6700 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
6678 &curr_gr_tsgid); 6701 &curr_gr_tsgid);
6679 6702
6680 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, 6703 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
6681 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" 6704 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
6682 " ch->hw_chid=%d", curr_gr_chid, 6705 " ch->hw_chid=%d",
6683 curr_gr_tsgid, ch->tsgid, ch->hw_chid); 6706 curr_ch ? curr_ch->hw_chid : -1,
6684 6707 curr_gr_tsgid,
6685 if (curr_gr_chid == -1) 6708 ch->tsgid,
6709 ch->hw_chid);
6710
6711 if (!curr_ch)
6686 return false; 6712 return false;
6687 6713
6688 if (ch->hw_chid == curr_gr_chid) 6714 if (ch->hw_chid == curr_ch->hw_chid)
6689 return true; 6715 ret = true;
6690 6716
6691 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) 6717 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))
6692 return true; 6718 ret = true;
6693 6719
6694 return false; 6720 gk20a_channel_put(curr_ch);
6721 return ret;
6695} 6722}
6696 6723
6697int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 6724int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,