diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-02-21 18:36:49 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-07 16:42:28 -0500 |
commit | b9feba6efc48743da70e474d40b7889a7efb4ba5 (patch) | |
tree | 668fed9a239d27dfc80abe525c43f6f864c90142 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | b9991767cca9e4166e83ab03a07bf79316cf749a (diff) |
gpu: nvgpu: in-kernel kickoff profiling
Add a debugfs interface to profile the kickoff ioctl
it provides the probability distribution and separates the information
between time spent in: the full ioctl, the kickoff function, the amount
of time spent in job tracking and the amount of time doing pushbuffer
copies
JIRA: EVLR-1003
Change-Id: I9888b114c3fbced61b1cf134c79f7a8afce15f56
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1308997
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 68e43259..f58b208c 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -2987,7 +2987,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2987 | u32 flags, | 2987 | u32 flags, |
2988 | struct nvgpu_fence *fence, | 2988 | struct nvgpu_fence *fence, |
2989 | struct gk20a_fence **fence_out, | 2989 | struct gk20a_fence **fence_out, |
2990 | bool force_need_sync_fence) | 2990 | bool force_need_sync_fence, |
2991 | struct fifo_profile_gk20a *profile) | ||
2991 | { | 2992 | { |
2992 | struct gk20a *g = c->g; | 2993 | struct gk20a *g = c->g; |
2993 | struct device *d = dev_from_gk20a(g); | 2994 | struct device *d = dev_from_gk20a(g); |
@@ -3036,6 +3037,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3036 | return -EINVAL; | 3037 | return -EINVAL; |
3037 | } | 3038 | } |
3038 | 3039 | ||
3040 | if (profile) | ||
3041 | profile->timestamp[PROFILE_ENTRY] = sched_clock(); | ||
3042 | |||
3039 | #ifdef CONFIG_DEBUG_FS | 3043 | #ifdef CONFIG_DEBUG_FS |
3040 | /* update debug settings */ | 3044 | /* update debug settings */ |
3041 | if (g->ops.ltc.sync_debugfs) | 3045 | if (g->ops.ltc.sync_debugfs) |
@@ -3162,6 +3166,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3162 | goto clean_up_job; | 3166 | goto clean_up_job; |
3163 | } | 3167 | } |
3164 | 3168 | ||
3169 | if (profile) | ||
3170 | profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock(); | ||
3171 | |||
3165 | if (wait_cmd) | 3172 | if (wait_cmd) |
3166 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | 3173 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); |
3167 | 3174 | ||
@@ -3184,6 +3191,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3184 | if (need_job_tracking) | 3191 | if (need_job_tracking) |
3185 | /* TODO! Check for errors... */ | 3192 | /* TODO! Check for errors... */ |
3186 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | 3193 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); |
3194 | if (profile) | ||
3195 | profile->timestamp[PROFILE_APPEND] = sched_clock(); | ||
3187 | 3196 | ||
3188 | g->ops.fifo.userd_gp_put(g, c); | 3197 | g->ops.fifo.userd_gp_put(g, c); |
3189 | 3198 | ||
@@ -3197,6 +3206,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3197 | gk20a_dbg_info("post-submit put %d, get %d, size %d", | 3206 | gk20a_dbg_info("post-submit put %d, get %d, size %d", |
3198 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | 3207 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); |
3199 | 3208 | ||
3209 | if (profile) | ||
3210 | profile->timestamp[PROFILE_END] = sched_clock(); | ||
3200 | gk20a_dbg_fn("done"); | 3211 | gk20a_dbg_fn("done"); |
3201 | return err; | 3212 | return err; |
3202 | 3213 | ||
@@ -3789,15 +3800,22 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
3789 | struct nvgpu_submit_gpfifo_args *args) | 3800 | struct nvgpu_submit_gpfifo_args *args) |
3790 | { | 3801 | { |
3791 | struct gk20a_fence *fence_out; | 3802 | struct gk20a_fence *fence_out; |
3803 | struct fifo_profile_gk20a *profile = NULL; | ||
3804 | |||
3792 | int ret = 0; | 3805 | int ret = 0; |
3793 | gk20a_dbg_fn(""); | 3806 | gk20a_dbg_fn(""); |
3794 | 3807 | ||
3808 | #ifdef CONFIG_DEBUG_FS | ||
3809 | profile = gk20a_fifo_profile_acquire(ch->g); | ||
3810 | |||
3811 | if (profile) | ||
3812 | profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock(); | ||
3813 | #endif | ||
3795 | if (ch->has_timedout) | 3814 | if (ch->has_timedout) |
3796 | return -ETIMEDOUT; | 3815 | return -ETIMEDOUT; |
3797 | |||
3798 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, | 3816 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, |
3799 | args->flags, &args->fence, | 3817 | args->flags, &args->fence, |
3800 | &fence_out, false); | 3818 | &fence_out, false, profile); |
3801 | 3819 | ||
3802 | if (ret) | 3820 | if (ret) |
3803 | goto clean_up; | 3821 | goto clean_up; |
@@ -3816,7 +3834,12 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
3816 | } | 3834 | } |
3817 | } | 3835 | } |
3818 | gk20a_fence_put(fence_out); | 3836 | gk20a_fence_put(fence_out); |
3819 | 3837 | #ifdef CONFIG_DEBUG_FS | |
3838 | if (profile) { | ||
3839 | profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock(); | ||
3840 | gk20a_fifo_profile_release(ch->g, profile); | ||
3841 | } | ||
3842 | #endif | ||
3820 | clean_up: | 3843 | clean_up: |
3821 | return ret; | 3844 | return ret; |
3822 | } | 3845 | } |