From 50d76e9b91a4b4b03bea5f92a7a1af452ce7c6f9 Mon Sep 17 00:00:00 2001 From: Mayank Kaushik Date: Fri, 26 Sep 2014 13:51:55 -0700 Subject: gpu: nvgpu: gk20a: regops: resident channel check Fix the code that checks if the channel passed in for regops is resident by also accounting for the TSG id, if the channel is part of a TSG. Change-Id: I449344e2887a4de4d55122f4aae5d3d4efabf725 Signed-off-by: Mayank Kaushik --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 47 ++++++++++++++++++++++++++++++++------ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + 2 files changed, 41 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4b0d7591..8ac1b276 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -5292,14 +5292,17 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g, } /* Used by sw interrupt thread to translate current ctx to chid. + * Also used by regops to translate current ctx to chid and tsgid. * For performance, we don't want to go through 128 channels every time. * curr_ctx should be the value read from gr_fecs_current_ctx_r(). * A small tlb is used here to cache translation */ -static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) +static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, + int *curr_tsgid) { struct fifo_gk20a *f = &g->fifo; struct gr_gk20a *gr = &g->gr; u32 chid = -1; + int tsgid = NVGPU_INVALID_TSG_ID; u32 i; /* when contexts are unloaded from GR, the valid bit is reset @@ -5315,6 +5318,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { if (gr->chid_tlb[i].curr_ctx == curr_ctx) { chid = gr->chid_tlb[i].hw_chid; + tsgid = gr->chid_tlb[i].tsgid; goto unlock; } } @@ -5324,8 +5328,10 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) if (f->channel[chid].in_use) { if ((u32)(f->channel[chid].inst_block.cpu_pa >> ram_in_base_shift_v()) == - gr_fecs_current_ctx_ptr_v(curr_ctx)) + gr_fecs_current_ctx_ptr_v(curr_ctx)) { + tsgid = f->channel[chid].tsgid; break; + } } if (chid >= f->num_channels) { @@ -5338,6 +5344,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) if (gr->chid_tlb[i].curr_ctx == 0) { gr->chid_tlb[i].curr_ctx = curr_ctx; gr->chid_tlb[i].hw_chid = chid; + gr->chid_tlb[i].tsgid = tsgid; goto unlock; } } @@ -5345,6 +5352,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) /* no free entry, flush one */ gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx; gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid; + gr->chid_tlb[gr->channel_tlb_flush_index].tsgid = tsgid; gr->channel_tlb_flush_index = (gr->channel_tlb_flush_index + 1) & @@ -5352,6 +5360,8 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx) unlock: spin_unlock(&gr->ch_tlb_lock); + if (curr_tsgid) + *curr_tsgid = tsgid; return chid; } @@ -5623,7 +5633,7 @@ int gk20a_gr_isr(struct gk20a *g) isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); isr_data.chid = - gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx); + gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); if (isr_data.chid == -1) { gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", isr_data.curr_ctx); @@ -6847,6 +6857,31 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, return -EINVAL; } +bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) +{ + int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; + struct gk20a *g = ch->g; + + curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); + curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, + &curr_gr_tsgid); + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, + "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" + " ch->hw_chid=%d", curr_gr_chid, + curr_gr_tsgid, ch->tsgid, ch->hw_chid); + + if (curr_gr_chid == -1) + return false; + + if (ch->hw_chid == curr_gr_chid) + return true; + + if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) + return true; + + return false; +} int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, @@ -6855,7 +6890,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, struct gk20a *g = ch->g; struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; void *ctx_ptr = NULL; - int curr_gr_chid, curr_gr_ctx; bool ch_is_curr_ctx, restart_gr_ctxsw = false; u32 i, j, offset, v; u32 max_offsets = proj_scal_litter_num_gpcs_v() * @@ -6881,11 +6915,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, restart_gr_ctxsw = true; - curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); - curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx); - ch_is_curr_ctx = (curr_gr_chid != -1) && (ch->hw_chid == curr_gr_chid); + ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); + if (ch_is_curr_ctx) { for (pass = 0; pass < 2; pass++) { ctx_op_nr = 0; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 7a4303f7..4b1f6de2 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -93,6 +93,7 @@ enum { struct gr_channel_map_tlb_entry { u32 curr_ctx; u32 hw_chid; + u32 tsgid; }; struct gr_zcull_gk20a { -- cgit v1.2.2