summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2017-02-21 18:36:49 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-07 16:42:28 -0500
commitb9feba6efc48743da70e474d40b7889a7efb4ba5 (patch)
tree668fed9a239d27dfc80abe525c43f6f864c90142 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parentb9991767cca9e4166e83ab03a07bf79316cf749a (diff)
gpu: nvgpu: in-kernel kickoff profiling
Add a debugfs interface to profile the kickoff ioctl it provides the probability distribution and separates the information between time spent in: the full ioctl, the kickoff function, the amount of time spent in job tracking and the amount of time doing pushbuffer copies JIRA: EVLR-1003 Change-Id: I9888b114c3fbced61b1cf134c79f7a8afce15f56 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1308997 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c31
1 files changed, 27 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 68e43259..f58b208c 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2987,7 +2987,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2987 u32 flags, 2987 u32 flags,
2988 struct nvgpu_fence *fence, 2988 struct nvgpu_fence *fence,
2989 struct gk20a_fence **fence_out, 2989 struct gk20a_fence **fence_out,
2990 bool force_need_sync_fence) 2990 bool force_need_sync_fence,
2991 struct fifo_profile_gk20a *profile)
2991{ 2992{
2992 struct gk20a *g = c->g; 2993 struct gk20a *g = c->g;
2993 struct device *d = dev_from_gk20a(g); 2994 struct device *d = dev_from_gk20a(g);
@@ -3036,6 +3037,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3036 return -EINVAL; 3037 return -EINVAL;
3037 } 3038 }
3038 3039
3040 if (profile)
3041 profile->timestamp[PROFILE_ENTRY] = sched_clock();
3042
3039#ifdef CONFIG_DEBUG_FS 3043#ifdef CONFIG_DEBUG_FS
3040 /* update debug settings */ 3044 /* update debug settings */
3041 if (g->ops.ltc.sync_debugfs) 3045 if (g->ops.ltc.sync_debugfs)
@@ -3162,6 +3166,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3162 goto clean_up_job; 3166 goto clean_up_job;
3163 } 3167 }
3164 3168
3169 if (profile)
3170 profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock();
3171
3165 if (wait_cmd) 3172 if (wait_cmd)
3166 gk20a_submit_append_priv_cmdbuf(c, wait_cmd); 3173 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
3167 3174
@@ -3184,6 +3191,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3184 if (need_job_tracking) 3191 if (need_job_tracking)
3185 /* TODO! Check for errors... */ 3192 /* TODO! Check for errors... */
3186 gk20a_channel_add_job(c, job, skip_buffer_refcounting); 3193 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
3194 if (profile)
3195 profile->timestamp[PROFILE_APPEND] = sched_clock();
3187 3196
3188 g->ops.fifo.userd_gp_put(g, c); 3197 g->ops.fifo.userd_gp_put(g, c);
3189 3198
@@ -3197,6 +3206,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3197 gk20a_dbg_info("post-submit put %d, get %d, size %d", 3206 gk20a_dbg_info("post-submit put %d, get %d, size %d",
3198 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); 3207 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
3199 3208
3209 if (profile)
3210 profile->timestamp[PROFILE_END] = sched_clock();
3200 gk20a_dbg_fn("done"); 3211 gk20a_dbg_fn("done");
3201 return err; 3212 return err;
3202 3213
@@ -3789,15 +3800,22 @@ static int gk20a_ioctl_channel_submit_gpfifo(
3789 struct nvgpu_submit_gpfifo_args *args) 3800 struct nvgpu_submit_gpfifo_args *args)
3790{ 3801{
3791 struct gk20a_fence *fence_out; 3802 struct gk20a_fence *fence_out;
3803 struct fifo_profile_gk20a *profile = NULL;
3804
3792 int ret = 0; 3805 int ret = 0;
3793 gk20a_dbg_fn(""); 3806 gk20a_dbg_fn("");
3794 3807
3808#ifdef CONFIG_DEBUG_FS
3809 profile = gk20a_fifo_profile_acquire(ch->g);
3810
3811 if (profile)
3812 profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock();
3813#endif
3795 if (ch->has_timedout) 3814 if (ch->has_timedout)
3796 return -ETIMEDOUT; 3815 return -ETIMEDOUT;
3797
3798 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, 3816 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
3799 args->flags, &args->fence, 3817 args->flags, &args->fence,
3800 &fence_out, false); 3818 &fence_out, false, profile);
3801 3819
3802 if (ret) 3820 if (ret)
3803 goto clean_up; 3821 goto clean_up;
@@ -3816,7 +3834,12 @@ static int gk20a_ioctl_channel_submit_gpfifo(
3816 } 3834 }
3817 } 3835 }
3818 gk20a_fence_put(fence_out); 3836 gk20a_fence_put(fence_out);
3819 3837#ifdef CONFIG_DEBUG_FS
3838 if (profile) {
3839 profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock();
3840 gk20a_fifo_profile_release(ch->g, profile);
3841 }
3842#endif
3820clean_up: 3843clean_up:
3821 return ret; 3844 return ret;
3822} 3845}