From b9feba6efc48743da70e474d40b7889a7efb4ba5 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Tue, 21 Feb 2017 15:36:49 -0800 Subject: gpu: nvgpu: in-kernel kickoff profiling Add a debugfs interface to profile the kickoff ioctl it provides the probability distribution and separates the information between time spent in: the full ioctl, the kickoff function, the amount of time spent in job tracking and the amount of time doing pushbuffer copies JIRA: EVLR-1003 Change-Id: I9888b114c3fbced61b1cf134c79f7a8afce15f56 Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1308997 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 68e43259..f58b208c 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -2987,7 +2987,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, u32 flags, struct nvgpu_fence *fence, struct gk20a_fence **fence_out, - bool force_need_sync_fence) + bool force_need_sync_fence, + struct fifo_profile_gk20a *profile) { struct gk20a *g = c->g; struct device *d = dev_from_gk20a(g); @@ -3036,6 +3037,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, return -EINVAL; } + if (profile) + profile->timestamp[PROFILE_ENTRY] = sched_clock(); + #ifdef CONFIG_DEBUG_FS /* update debug settings */ if (g->ops.ltc.sync_debugfs) @@ -3162,6 +3166,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, goto clean_up_job; } + if (profile) + profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock(); + if (wait_cmd) gk20a_submit_append_priv_cmdbuf(c, wait_cmd); @@ -3184,6 +3191,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, if (need_job_tracking) /* TODO! Check for errors... */ gk20a_channel_add_job(c, job, skip_buffer_refcounting); + if (profile) + profile->timestamp[PROFILE_APPEND] = sched_clock(); g->ops.fifo.userd_gp_put(g, c); @@ -3197,6 +3206,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, gk20a_dbg_info("post-submit put %d, get %d, size %d", c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); + if (profile) + profile->timestamp[PROFILE_END] = sched_clock(); gk20a_dbg_fn("done"); return err; @@ -3789,15 +3800,22 @@ static int gk20a_ioctl_channel_submit_gpfifo( struct nvgpu_submit_gpfifo_args *args) { struct gk20a_fence *fence_out; + struct fifo_profile_gk20a *profile = NULL; + int ret = 0; gk20a_dbg_fn(""); +#ifdef CONFIG_DEBUG_FS + profile = gk20a_fifo_profile_acquire(ch->g); + + if (profile) + profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock(); +#endif if (ch->has_timedout) return -ETIMEDOUT; - ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, args->flags, &args->fence, - &fence_out, false); + &fence_out, false, profile); if (ret) goto clean_up; @@ -3816,7 +3834,12 @@ static int gk20a_ioctl_channel_submit_gpfifo( } } gk20a_fence_put(fence_out); - +#ifdef CONFIG_DEBUG_FS + if (profile) { + profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock(); + gk20a_fifo_profile_release(ch->g, profile); + } +#endif clean_up: return ret; } -- cgit v1.2.2