From 3794afbeb177ed0932d166d30bb2af9d9859dff9 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 13 Nov 2018 15:36:19 +0200 Subject: gpu: nvgpu: add safe channel id lookup Add gk20a_channel_from_id() to retrieve a channel, given a raw channel ID, with a reference taken (or NULL if the channel was dead). This makes it harder to mistakenly use a channel that's dead and thus uncovers bugs sooner. Convert code to use the new lookup when applicable; work remains to convert complex uses where a ref should have been taken but hasn't. The channel ID is also validated against FIFO_INVAL_CHANNEL_ID; NULL is returned for such IDs. This is often useful and does not hurt when unnecessary. However, this does not prevent the case where a channel would be closed and reopened again when someone would hold a stale channel number. In all such conditions the caller should hold a reference already. The only conditions where a channel can be safely looked up by an id and used without taking a ref are when initializing or deinitializing the list of channels. Jira NVGPU-1460 Change-Id: I0a30968d17c1e0784d315a676bbe69c03a73481c Signed-off-by: Konsta Holtta Reviewed-on: https://git-master.nvidia.com/r/1955400 Signed-off-by: Debarshi Dutta (cherry picked from commit 7df3d587502c2de997dfbe8ea8ddc114d0a0481e in dev-kernel) Reviewed-on: https://git-master.nvidia.com/r/2008515 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/channel.c | 50 ++++++++++++++++++++----------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 18 +++++------ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 7 +++-- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 9 +++--- drivers/gpu/nvgpu/include/nvgpu/channel.h | 5 ++++ drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 3 +- drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 4 +-- drivers/gpu/nvgpu/vgpu/vgpu.c | 2 +- 8 files changed, 59 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 54fa4747..45c02a75 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -626,6 +626,18 @@ void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable); } +struct channel_gk20a *_gk20a_channel_from_id(struct gk20a *g, u32 chid, + const char *caller) +{ + nvgpu_log_fn(g, " "); + + if (chid == FIFO_INVAL_CHANNEL_ID) { + return NULL; + } + + return _gk20a_channel_get(&g->fifo.channel[chid], caller); +} + void gk20a_channel_close(struct channel_gk20a *ch) { gk20a_free_channel(ch, false); @@ -1450,11 +1462,10 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) u32 chid; for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); - if (gk20a_channel_get(ch) == NULL) { + if (ch == NULL) continue; - } nvgpu_raw_spinlock_acquire(&ch->timeout.lock); if (ch->timeout.running) { @@ -1553,9 +1564,9 @@ static void gk20a_channel_poll_timeouts(struct gk20a *g) for (chid = 0; chid < g->fifo.num_channels; chid++) { - struct channel_gk20a *ch = &g->fifo.channel[chid]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); - if (gk20a_channel_get(ch)) { + if (ch != NULL) { gk20a_channel_timeout_check(ch); gk20a_channel_put(ch); } @@ -2107,9 +2118,9 @@ void gk20a_channel_deterministic_idle(struct gk20a *g) nvgpu_rwsem_down_write(&g->deterministic_busy); for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); - if (gk20a_channel_get(ch) == NULL) { + if (ch == NULL) { continue; } @@ -2145,9 +2156,9 @@ void gk20a_channel_deterministic_unidle(struct gk20a *g) u32 chid; for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); - if (gk20a_channel_get(ch) == NULL) { + if (ch == NULL) { continue; } @@ -2256,8 +2267,9 @@ int gk20a_channel_suspend(struct gk20a *g) nvgpu_log_fn(g, " "); for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - if (gk20a_channel_get(ch) != NULL) { + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); + + if (ch != NULL) { nvgpu_log_info(g, "suspend channel %d", chid); /* disable channel */ gk20a_disable_channel_tsg(g, ch); @@ -2280,9 +2292,11 @@ int gk20a_channel_suspend(struct gk20a *g) gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true); for (chid = 0; chid < f->num_channels; chid++) { - if (gk20a_channel_get(&f->channel[chid])) { - g->ops.fifo.unbind_channel(&f->channel[chid]); - gk20a_channel_put(&f->channel[chid]); + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); + + if (ch != NULL) { + g->ops.fifo.unbind_channel(ch); + gk20a_channel_put(ch); } } } @@ -2301,12 +2315,14 @@ int gk20a_channel_resume(struct gk20a *g) nvgpu_log_fn(g, " "); for (chid = 0; chid < f->num_channels; chid++) { - if (gk20a_channel_get(&f->channel[chid])) { + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); + + if (ch != NULL) { nvgpu_log_info(g, "resume channel %d", chid); - g->ops.fifo.bind_channel(&f->channel[chid]); + g->ops.fifo.bind_channel(ch); channels_in_use = true; active_runlist_ids |= BIT(f->channel[chid].runlist_id); - gk20a_channel_put(&f->channel[chid]); + gk20a_channel_put(ch); } } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 9691d51b..f4a4591d 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -1172,7 +1172,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr) struct channel_gk20a *ch; u64 ch_inst_ptr; - ch = gk20a_channel_get(&f->channel[ci]); + ch = gk20a_channel_from_id(g, ci); /* only alive channels are searched */ if (!ch) { continue; @@ -1959,9 +1959,9 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 chid, bool verbose, int rc_type) gk20a_fifo_recover(g, engines, chid, false, true, verbose, rc_type); } else { - struct channel_gk20a *ch = &g->fifo.channel[chid]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); - if (gk20a_channel_get(ch)) { + if (ch != NULL) { gk20a_channel_abort(ch, false); if (gk20a_fifo_error_ch(g, ch)) { @@ -2710,9 +2710,9 @@ static void gk20a_fifo_pbdma_fault_rc(struct gk20a *g, id = fifo_pbdma_status_id_v(status); if (fifo_pbdma_status_id_type_v(status) == fifo_pbdma_status_id_type_chid_v()) { - struct channel_gk20a *ch = &f->channel[id]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, id); - if (gk20a_channel_get(ch)) { + if (ch != NULL) { g->ops.fifo.set_error_notifier(ch, error_notifier); gk20a_fifo_recover_ch(g, id, true, RC_TYPE_PBDMA_FAULT); gk20a_channel_put(ch); @@ -2924,12 +2924,12 @@ void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, u32 id, gk20a_fifo_recover_tsg(g, id, true, RC_TYPE_PREEMPT_TIMEOUT); } else { - struct channel_gk20a *ch = &g->fifo.channel[id]; + struct channel_gk20a *ch = gk20a_channel_from_id(g, id); nvgpu_err(g, "preempt channel %d timeout", id); - if (gk20a_channel_get(ch)) { + if (ch != NULL) { g->ops.fifo.set_error_notifier(ch, NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); gk20a_fifo_recover_ch(g, id, true, @@ -4031,8 +4031,8 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, } for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - if (gk20a_channel_get(ch)) { + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); + if (ch != NULL) { ch_state[chid] = nvgpu_kmalloc(g, sizeof(struct ch_state) + ram_in_alloc_size_v()); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 8cb66279..928d80cb 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -5581,15 +5581,16 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( if (gr->chid_tlb[i].curr_ctx == curr_ctx) { chid = gr->chid_tlb[i].chid; tsgid = gr->chid_tlb[i].tsgid; - ret = gk20a_channel_get(&f->channel[chid]); + ret = gk20a_channel_from_id(g, chid); goto unlock; } } /* slow path */ for (chid = 0; chid < f->num_channels; chid++) { - struct channel_gk20a *ch = &f->channel[chid]; - if (gk20a_channel_get(ch) == NULL) { + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); + + if (ch == NULL) { continue; } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index f70a5a00..a3655146 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1961,8 +1961,8 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) chid = g->gr.cilp_preempt_pending_chid; - ch = gk20a_channel_get(gk20a_fifo_channel_from_chid(g, chid)); - if (!ch) { + ch = gk20a_channel_from_id(g, chid); + if (ch == NULL) { return ret; } @@ -2014,9 +2014,8 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g, goto clean_up; } - ch = gk20a_channel_get( - gk20a_fifo_channel_from_chid(g, chid)); - if (!ch) { + ch = gk20a_channel_from_id(g, chid); + if (ch == NULL) { goto clean_up; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 6cdcb973..1851b9e2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -393,6 +393,11 @@ struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) +/* returns NULL if could not take a ref to the channel */ +struct channel_gk20a *__must_check _gk20a_channel_from_id(struct gk20a *g, + u32 chid, const char *caller); +#define gk20a_channel_from_id(g, chid) _gk20a_channel_from_id(g, chid, __func__) + int gk20a_wait_channel_idle(struct channel_gk20a *ch); /* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */ diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index aa5abec9..8821e799 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -721,8 +721,7 @@ static void vgpu_fifo_set_ctx_mmu_error_ch_tsg(struct gk20a *g, int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info) { - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch = gk20a_channel_get(&f->channel[info->chid]); + struct channel_gk20a *ch = gk20a_channel_from_id(g, info->chid); nvgpu_log_fn(g, " "); if (!ch) diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 9fafa52f..6a86c9a0 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -953,10 +953,10 @@ int vgpu_init_gr_support(struct gk20a *g) int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) { - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch = gk20a_channel_get(&f->channel[info->chid]); + struct channel_gk20a *ch = gk20a_channel_from_id(g, info->chid); nvgpu_log_fn(g, " "); + if (!ch) return 0; diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 266b801e..07361afe 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -119,7 +119,7 @@ static void vgpu_handle_channel_event(struct gk20a *g, static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid) { - struct channel_gk20a *ch = gk20a_channel_get(&g->fifo.channel[chid]); + struct channel_gk20a *ch = gk20a_channel_from_id(g, chid); if (ch == NULL) { nvgpu_err(g, "invalid channel id %d", chid); -- cgit v1.2.2