diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 47 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 |
2 files changed, 41 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4b0d7591..8ac1b276 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -5292,14 +5292,17 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g, | |||
5292 | } | 5292 | } |
5293 | 5293 | ||
5294 | /* Used by sw interrupt thread to translate current ctx to chid. | 5294 | /* Used by sw interrupt thread to translate current ctx to chid. |
5295 | * Also used by regops to translate current ctx to chid and tsgid. | ||
5295 | * For performance, we don't want to go through 128 channels every time. | 5296 | * For performance, we don't want to go through 128 channels every time. |
5296 | * curr_ctx should be the value read from gr_fecs_current_ctx_r(). | 5297 | * curr_ctx should be the value read from gr_fecs_current_ctx_r(). |
5297 | * A small tlb is used here to cache translation */ | 5298 | * A small tlb is used here to cache translation */ |
5298 | static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | 5299 | static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, |
5300 | int *curr_tsgid) | ||
5299 | { | 5301 | { |
5300 | struct fifo_gk20a *f = &g->fifo; | 5302 | struct fifo_gk20a *f = &g->fifo; |
5301 | struct gr_gk20a *gr = &g->gr; | 5303 | struct gr_gk20a *gr = &g->gr; |
5302 | u32 chid = -1; | 5304 | u32 chid = -1; |
5305 | int tsgid = NVGPU_INVALID_TSG_ID; | ||
5303 | u32 i; | 5306 | u32 i; |
5304 | 5307 | ||
5305 | /* when contexts are unloaded from GR, the valid bit is reset | 5308 | /* when contexts are unloaded from GR, the valid bit is reset |
@@ -5315,6 +5318,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | |||
5315 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { | 5318 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { |
5316 | if (gr->chid_tlb[i].curr_ctx == curr_ctx) { | 5319 | if (gr->chid_tlb[i].curr_ctx == curr_ctx) { |
5317 | chid = gr->chid_tlb[i].hw_chid; | 5320 | chid = gr->chid_tlb[i].hw_chid; |
5321 | tsgid = gr->chid_tlb[i].tsgid; | ||
5318 | goto unlock; | 5322 | goto unlock; |
5319 | } | 5323 | } |
5320 | } | 5324 | } |
@@ -5324,8 +5328,10 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | |||
5324 | if (f->channel[chid].in_use) { | 5328 | if (f->channel[chid].in_use) { |
5325 | if ((u32)(f->channel[chid].inst_block.cpu_pa >> | 5329 | if ((u32)(f->channel[chid].inst_block.cpu_pa >> |
5326 | ram_in_base_shift_v()) == | 5330 | ram_in_base_shift_v()) == |
5327 | gr_fecs_current_ctx_ptr_v(curr_ctx)) | 5331 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { |
5332 | tsgid = f->channel[chid].tsgid; | ||
5328 | break; | 5333 | break; |
5334 | } | ||
5329 | } | 5335 | } |
5330 | 5336 | ||
5331 | if (chid >= f->num_channels) { | 5337 | if (chid >= f->num_channels) { |
@@ -5338,6 +5344,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | |||
5338 | if (gr->chid_tlb[i].curr_ctx == 0) { | 5344 | if (gr->chid_tlb[i].curr_ctx == 0) { |
5339 | gr->chid_tlb[i].curr_ctx = curr_ctx; | 5345 | gr->chid_tlb[i].curr_ctx = curr_ctx; |
5340 | gr->chid_tlb[i].hw_chid = chid; | 5346 | gr->chid_tlb[i].hw_chid = chid; |
5347 | gr->chid_tlb[i].tsgid = tsgid; | ||
5341 | goto unlock; | 5348 | goto unlock; |
5342 | } | 5349 | } |
5343 | } | 5350 | } |
@@ -5345,6 +5352,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | |||
5345 | /* no free entry, flush one */ | 5352 | /* no free entry, flush one */ |
5346 | gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx; | 5353 | gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx; |
5347 | gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid; | 5354 | gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid; |
5355 | gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid; | ||
5348 | 5356 | ||
5349 | gr->channel_tlb_flush_index = | 5357 | gr->channel_tlb_flush_index = |
5350 | (gr->channel_tlb_flush_index + 1) & | 5358 | (gr->channel_tlb_flush_index + 1) & |
@@ -5352,6 +5360,8 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) | |||
5352 | 5360 | ||
5353 | unlock: | 5361 | unlock: |
5354 | spin_unlock(&gr->ch_tlb_lock); | 5362 | spin_unlock(&gr->ch_tlb_lock); |
5363 | if (curr_tsgid) | ||
5364 | *curr_tsgid = tsgid; | ||
5355 | return chid; | 5365 | return chid; |
5356 | } | 5366 | } |
5357 | 5367 | ||
@@ -5623,7 +5633,7 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5623 | isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); | 5633 | isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); |
5624 | 5634 | ||
5625 | isr_data.chid = | 5635 | isr_data.chid = |
5626 | gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx); | 5636 | gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); |
5627 | if (isr_data.chid == -1) { | 5637 | if (isr_data.chid == -1) { |
5628 | gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", | 5638 | gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", |
5629 | isr_data.curr_ctx); | 5639 | isr_data.curr_ctx); |
@@ -6847,6 +6857,31 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
6847 | return -EINVAL; | 6857 | return -EINVAL; |
6848 | } | 6858 | } |
6849 | 6859 | ||
6860 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | ||
6861 | { | ||
6862 | int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; | ||
6863 | struct gk20a *g = ch->g; | ||
6864 | |||
6865 | curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); | ||
6866 | curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, | ||
6867 | &curr_gr_tsgid); | ||
6868 | |||
6869 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
6870 | "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" | ||
6871 | " ch->hw_chid=%d", curr_gr_chid, | ||
6872 | curr_gr_tsgid, ch->tsgid, ch->hw_chid); | ||
6873 | |||
6874 | if (curr_gr_chid == -1) | ||
6875 | return false; | ||
6876 | |||
6877 | if (ch->hw_chid == curr_gr_chid) | ||
6878 | return true; | ||
6879 | |||
6880 | if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) | ||
6881 | return true; | ||
6882 | |||
6883 | return false; | ||
6884 | } | ||
6850 | 6885 | ||
6851 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | 6886 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
6852 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | 6887 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
@@ -6855,7 +6890,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6855 | struct gk20a *g = ch->g; | 6890 | struct gk20a *g = ch->g; |
6856 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 6891 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
6857 | void *ctx_ptr = NULL; | 6892 | void *ctx_ptr = NULL; |
6858 | int curr_gr_chid, curr_gr_ctx; | ||
6859 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | 6893 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; |
6860 | u32 i, j, offset, v; | 6894 | u32 i, j, offset, v; |
6861 | u32 max_offsets = proj_scal_litter_num_gpcs_v() * | 6895 | u32 max_offsets = proj_scal_litter_num_gpcs_v() * |
@@ -6881,11 +6915,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6881 | 6915 | ||
6882 | restart_gr_ctxsw = true; | 6916 | restart_gr_ctxsw = true; |
6883 | 6917 | ||
6884 | curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); | 6918 | ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); |
6885 | curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx); | ||
6886 | ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid); | ||
6887 | 6919 | ||
6888 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); | 6920 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); |
6921 | |||
6889 | if (ch_is_curr_ctx) { | 6922 | if (ch_is_curr_ctx) { |
6890 | for (pass = 0; pass < 2; pass++) { | 6923 | for (pass = 0; pass < 2; pass++) { |
6891 | ctx_op_nr = 0; | 6924 | ctx_op_nr = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 7a4303f7..4b1f6de2 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -93,6 +93,7 @@ enum { | |||
93 | struct gr_channel_map_tlb_entry { | 93 | struct gr_channel_map_tlb_entry { |
94 | u32 curr_ctx; | 94 | u32 curr_ctx; |
95 | u32 hw_chid; | 95 | u32 hw_chid; |
96 | u32 tsgid; | ||
96 | }; | 97 | }; |
97 | 98 | ||
98 | struct gr_zcull_gk20a { | 99 | struct gr_zcull_gk20a { |