summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorMayank Kaushik <mkaushik@nvidia.com>2014-09-26 16:51:55 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:57 -0400
commit50d76e9b91a4b4b03bea5f92a7a1af452ce7c6f9 (patch)
tree7f778472acb105c0e54e9fd00cb086dce5acf0d6 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent1ee103adf30c14efef932faedc6b1bb226f7139b (diff)
gpu: nvgpu: gk20a: regops: resident channel check
Fix the code that checks if the channel passed in for regops is resident by also accounting for the TSG id, if the channel is part of a TSG. Change-Id: I449344e2887a4de4d55122f4aae5d3d4efabf725 Signed-off-by: Mayank Kaushik <mkaushik@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c47
1 files changed, 40 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4b0d7591..8ac1b276 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5292,14 +5292,17 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
5292} 5292}
5293 5293
5294/* Used by sw interrupt thread to translate current ctx to chid. 5294/* Used by sw interrupt thread to translate current ctx to chid.
5295 * Also used by regops to translate current ctx to chid and tsgid.
5295 * For performance, we don't want to go through 128 channels every time. 5296 * For performance, we don't want to go through 128 channels every time.
5296 * curr_ctx should be the value read from gr_fecs_current_ctx_r(). 5297 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
5297 * A small tlb is used here to cache translation */ 5298 * A small tlb is used here to cache translation */
5298static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) 5299static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5300 int *curr_tsgid)
5299{ 5301{
5300 struct fifo_gk20a *f = &g->fifo; 5302 struct fifo_gk20a *f = &g->fifo;
5301 struct gr_gk20a *gr = &g->gr; 5303 struct gr_gk20a *gr = &g->gr;
5302 u32 chid = -1; 5304 u32 chid = -1;
5305 int tsgid = NVGPU_INVALID_TSG_ID;
5303 u32 i; 5306 u32 i;
5304 5307
5305 /* when contexts are unloaded from GR, the valid bit is reset 5308 /* when contexts are unloaded from GR, the valid bit is reset
@@ -5315,6 +5318,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
5315 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { 5318 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
5316 if (gr->chid_tlb[i].curr_ctx == curr_ctx) { 5319 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5317 chid = gr->chid_tlb[i].hw_chid; 5320 chid = gr->chid_tlb[i].hw_chid;
5321 tsgid = gr->chid_tlb[i].tsgid;
5318 goto unlock; 5322 goto unlock;
5319 } 5323 }
5320 } 5324 }
@@ -5324,8 +5328,10 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
5324 if (f->channel[chid].in_use) { 5328 if (f->channel[chid].in_use) {
5325 if ((u32)(f->channel[chid].inst_block.cpu_pa >> 5329 if ((u32)(f->channel[chid].inst_block.cpu_pa >>
5326 ram_in_base_shift_v()) == 5330 ram_in_base_shift_v()) ==
5327 gr_fecs_current_ctx_ptr_v(curr_ctx)) 5331 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5332 tsgid = f->channel[chid].tsgid;
5328 break; 5333 break;
5334 }
5329 } 5335 }
5330 5336
5331 if (chid >= f->num_channels) { 5337 if (chid >= f->num_channels) {
@@ -5338,6 +5344,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
5338 if (gr->chid_tlb[i].curr_ctx == 0) { 5344 if (gr->chid_tlb[i].curr_ctx == 0) {
5339 gr->chid_tlb[i].curr_ctx = curr_ctx; 5345 gr->chid_tlb[i].curr_ctx = curr_ctx;
5340 gr->chid_tlb[i].hw_chid = chid; 5346 gr->chid_tlb[i].hw_chid = chid;
5347 gr->chid_tlb[i].tsgid = tsgid;
5341 goto unlock; 5348 goto unlock;
5342 } 5349 }
5343 } 5350 }
@@ -5345,6 +5352,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
5345 /* no free entry, flush one */ 5352 /* no free entry, flush one */
5346 gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx; 5353 gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
5347 gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid; 5354 gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
5355 gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid;
5348 5356
5349 gr->channel_tlb_flush_index = 5357 gr->channel_tlb_flush_index =
5350 (gr->channel_tlb_flush_index + 1) & 5358 (gr->channel_tlb_flush_index + 1) &
@@ -5352,6 +5360,8 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
5352 5360
5353unlock: 5361unlock:
5354 spin_unlock(&gr->ch_tlb_lock); 5362 spin_unlock(&gr->ch_tlb_lock);
5363 if (curr_tsgid)
5364 *curr_tsgid = tsgid;
5355 return chid; 5365 return chid;
5356} 5366}
5357 5367
@@ -5623,7 +5633,7 @@ int gk20a_gr_isr(struct gk20a *g)
5623 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); 5633 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5624 5634
5625 isr_data.chid = 5635 isr_data.chid =
5626 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx); 5636 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL);
5627 if (isr_data.chid == -1) { 5637 if (isr_data.chid == -1) {
5628 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", 5638 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5629 isr_data.curr_ctx); 5639 isr_data.curr_ctx);
@@ -6847,6 +6857,31 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6847 return -EINVAL; 6857 return -EINVAL;
6848} 6858}
6849 6859
6860bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
6861{
6862 int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid;
6863 struct gk20a *g = ch->g;
6864
6865 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6866 curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx,
6867 &curr_gr_tsgid);
6868
6869 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
6870 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
6871 " ch->hw_chid=%d", curr_gr_chid,
6872 curr_gr_tsgid, ch->tsgid, ch->hw_chid);
6873
6874 if (curr_gr_chid == -1)
6875 return false;
6876
6877 if (ch->hw_chid == curr_gr_chid)
6878 return true;
6879
6880 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))
6881 return true;
6882
6883 return false;
6884}
6850 6885
6851int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 6886int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6852 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, 6887 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
@@ -6855,7 +6890,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6855 struct gk20a *g = ch->g; 6890 struct gk20a *g = ch->g;
6856 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 6891 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
6857 void *ctx_ptr = NULL; 6892 void *ctx_ptr = NULL;
6858 int curr_gr_chid, curr_gr_ctx;
6859 bool ch_is_curr_ctx, restart_gr_ctxsw = false; 6893 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
6860 u32 i, j, offset, v; 6894 u32 i, j, offset, v;
6861 u32 max_offsets = proj_scal_litter_num_gpcs_v() * 6895 u32 max_offsets = proj_scal_litter_num_gpcs_v() *
@@ -6881,11 +6915,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6881 6915
6882 restart_gr_ctxsw = true; 6916 restart_gr_ctxsw = true;
6883 6917
6884 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); 6918 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
6885 curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx);
6886 ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid);
6887 6919
6888 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); 6920 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
6921
6889 if (ch_is_curr_ctx) { 6922 if (ch_is_curr_ctx) {
6890 for (pass = 0; pass < 2; pass++) { 6923 for (pass = 0; pass < 2; pass++) {
6891 ctx_op_nr = 0; 6924 ctx_op_nr = 0;