diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-02-21 18:36:49 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-07 16:42:28 -0500 |
commit | b9feba6efc48743da70e474d40b7889a7efb4ba5 (patch) | |
tree | 668fed9a239d27dfc80abe525c43f6f864c90142 /drivers/gpu/nvgpu/gk20a | |
parent | b9991767cca9e4166e83ab03a07bf79316cf749a (diff) |
gpu: nvgpu: in-kernel kickoff profiling
Add a debugfs interface to profile the kickoff ioctl
it provides the probability distribution and separates the information
between time spent in: the full ioctl, the kickoff function, the amount
of time spent in job tracking and the amount of time doing pushbuffer
copies
JIRA: EVLR-1003
Change-Id: I9888b114c3fbced61b1cf134c79f7a8afce15f56
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1308997
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 31 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 218 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 39 |
6 files changed, 289 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index d43bc93f..d19479a2 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -727,7 +727,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | |||
727 | } | 727 | } |
728 | 728 | ||
729 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, | 729 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, |
730 | num_entries, flags, fence, fence_out, true); | 730 | num_entries, flags, fence, fence_out, true, |
731 | NULL); | ||
731 | } | 732 | } |
732 | 733 | ||
733 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | 734 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) |
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index fd248313..db1ac539 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -653,7 +653,7 @@ int gk20a_ce_execute_ops(struct device *dev, | |||
653 | 653 | ||
654 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | 654 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, |
655 | 1, submit_flags, &fence, | 655 | 1, submit_flags, &fence, |
656 | &ce_cmd_buf_fence_out, false); | 656 | &ce_cmd_buf_fence_out, false, NULL); |
657 | 657 | ||
658 | if (!ret) { | 658 | if (!ret) { |
659 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | 659 | memcpy((void *)(cmd_buf_cpu_va + fence_index), |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 68e43259..f58b208c 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -2987,7 +2987,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2987 | u32 flags, | 2987 | u32 flags, |
2988 | struct nvgpu_fence *fence, | 2988 | struct nvgpu_fence *fence, |
2989 | struct gk20a_fence **fence_out, | 2989 | struct gk20a_fence **fence_out, |
2990 | bool force_need_sync_fence) | 2990 | bool force_need_sync_fence, |
2991 | struct fifo_profile_gk20a *profile) | ||
2991 | { | 2992 | { |
2992 | struct gk20a *g = c->g; | 2993 | struct gk20a *g = c->g; |
2993 | struct device *d = dev_from_gk20a(g); | 2994 | struct device *d = dev_from_gk20a(g); |
@@ -3036,6 +3037,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3036 | return -EINVAL; | 3037 | return -EINVAL; |
3037 | } | 3038 | } |
3038 | 3039 | ||
3040 | if (profile) | ||
3041 | profile->timestamp[PROFILE_ENTRY] = sched_clock(); | ||
3042 | |||
3039 | #ifdef CONFIG_DEBUG_FS | 3043 | #ifdef CONFIG_DEBUG_FS |
3040 | /* update debug settings */ | 3044 | /* update debug settings */ |
3041 | if (g->ops.ltc.sync_debugfs) | 3045 | if (g->ops.ltc.sync_debugfs) |
@@ -3162,6 +3166,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3162 | goto clean_up_job; | 3166 | goto clean_up_job; |
3163 | } | 3167 | } |
3164 | 3168 | ||
3169 | if (profile) | ||
3170 | profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock(); | ||
3171 | |||
3165 | if (wait_cmd) | 3172 | if (wait_cmd) |
3166 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); | 3173 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); |
3167 | 3174 | ||
@@ -3184,6 +3191,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3184 | if (need_job_tracking) | 3191 | if (need_job_tracking) |
3185 | /* TODO! Check for errors... */ | 3192 | /* TODO! Check for errors... */ |
3186 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); | 3193 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); |
3194 | if (profile) | ||
3195 | profile->timestamp[PROFILE_APPEND] = sched_clock(); | ||
3187 | 3196 | ||
3188 | g->ops.fifo.userd_gp_put(g, c); | 3197 | g->ops.fifo.userd_gp_put(g, c); |
3189 | 3198 | ||
@@ -3197,6 +3206,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
3197 | gk20a_dbg_info("post-submit put %d, get %d, size %d", | 3206 | gk20a_dbg_info("post-submit put %d, get %d, size %d", |
3198 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); | 3207 | c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); |
3199 | 3208 | ||
3209 | if (profile) | ||
3210 | profile->timestamp[PROFILE_END] = sched_clock(); | ||
3200 | gk20a_dbg_fn("done"); | 3211 | gk20a_dbg_fn("done"); |
3201 | return err; | 3212 | return err; |
3202 | 3213 | ||
@@ -3789,15 +3800,22 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
3789 | struct nvgpu_submit_gpfifo_args *args) | 3800 | struct nvgpu_submit_gpfifo_args *args) |
3790 | { | 3801 | { |
3791 | struct gk20a_fence *fence_out; | 3802 | struct gk20a_fence *fence_out; |
3803 | struct fifo_profile_gk20a *profile = NULL; | ||
3804 | |||
3792 | int ret = 0; | 3805 | int ret = 0; |
3793 | gk20a_dbg_fn(""); | 3806 | gk20a_dbg_fn(""); |
3794 | 3807 | ||
3808 | #ifdef CONFIG_DEBUG_FS | ||
3809 | profile = gk20a_fifo_profile_acquire(ch->g); | ||
3810 | |||
3811 | if (profile) | ||
3812 | profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock(); | ||
3813 | #endif | ||
3795 | if (ch->has_timedout) | 3814 | if (ch->has_timedout) |
3796 | return -ETIMEDOUT; | 3815 | return -ETIMEDOUT; |
3797 | |||
3798 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, | 3816 | ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, |
3799 | args->flags, &args->fence, | 3817 | args->flags, &args->fence, |
3800 | &fence_out, false); | 3818 | &fence_out, false, profile); |
3801 | 3819 | ||
3802 | if (ret) | 3820 | if (ret) |
3803 | goto clean_up; | 3821 | goto clean_up; |
@@ -3816,7 +3834,12 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
3816 | } | 3834 | } |
3817 | } | 3835 | } |
3818 | gk20a_fence_put(fence_out); | 3836 | gk20a_fence_put(fence_out); |
3819 | 3837 | #ifdef CONFIG_DEBUG_FS | |
3838 | if (profile) { | ||
3839 | profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock(); | ||
3840 | gk20a_fifo_profile_release(ch->g, profile); | ||
3841 | } | ||
3842 | #endif | ||
3820 | clean_up: | 3843 | clean_up: |
3821 | return ret; | 3844 | return ret; |
3822 | } | 3845 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index d9913cd7..42550632 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -33,6 +33,7 @@ struct gk20a; | |||
33 | struct gr_gk20a; | 33 | struct gr_gk20a; |
34 | struct dbg_session_gk20a; | 34 | struct dbg_session_gk20a; |
35 | struct gk20a_fence; | 35 | struct gk20a_fence; |
36 | struct fifo_profile_gk20a; | ||
36 | 37 | ||
37 | #include "channel_sync_gk20a.h" | 38 | #include "channel_sync_gk20a.h" |
38 | 39 | ||
@@ -344,7 +345,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
344 | u32 flags, | 345 | u32 flags, |
345 | struct nvgpu_fence *fence, | 346 | struct nvgpu_fence *fence, |
346 | struct gk20a_fence **fence_out, | 347 | struct gk20a_fence **fence_out, |
347 | bool force_need_sync_fence); | 348 | bool force_need_sync_fence, |
349 | struct fifo_profile_gk20a *profile); | ||
348 | 350 | ||
349 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 351 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
350 | struct nvgpu_alloc_gpfifo_ex_args *args); | 352 | struct nvgpu_alloc_gpfifo_ex_args *args); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index d072fb48..35d56ce4 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <trace/events/gk20a.h> | 23 | #include <trace/events/gk20a.h> |
24 | #include <linux/dma-mapping.h> | 24 | #include <linux/dma-mapping.h> |
25 | #include <linux/nvhost.h> | 25 | #include <linux/nvhost.h> |
26 | #include <linux/sort.h> | ||
26 | 27 | ||
27 | #include <nvgpu/timers.h> | 28 | #include <nvgpu/timers.h> |
28 | #include <nvgpu/semaphore.h> | 29 | #include <nvgpu/semaphore.h> |
@@ -46,6 +47,10 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
46 | bool wait_for_finish); | 47 | bool wait_for_finish); |
47 | static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); | 48 | static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); |
48 | 49 | ||
50 | #ifdef CONFIG_DEBUG_FS | ||
51 | static void __gk20a_fifo_profile_free(struct kref *ref); | ||
52 | #endif | ||
53 | |||
49 | u32 gk20a_fifo_get_engine_ids(struct gk20a *g, | 54 | u32 gk20a_fifo_get_engine_ids(struct gk20a *g, |
50 | u32 engine_id[], u32 engine_id_sz, | 55 | u32 engine_id[], u32 engine_id_sz, |
51 | u32 engine_enum) | 56 | u32 engine_enum) |
@@ -532,6 +537,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | |||
532 | f->engine_info = NULL; | 537 | f->engine_info = NULL; |
533 | kfree(f->active_engines_list); | 538 | kfree(f->active_engines_list); |
534 | f->active_engines_list = NULL; | 539 | f->active_engines_list = NULL; |
540 | #ifdef CONFIG_DEBUG_FS | ||
541 | nvgpu_mutex_acquire(&f->profile.lock); | ||
542 | if (f->profile.enabled) { | ||
543 | f->profile.enabled = false; | ||
544 | kref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
545 | } | ||
546 | nvgpu_mutex_release(&f->profile.lock); | ||
547 | #endif | ||
535 | } | 548 | } |
536 | 549 | ||
537 | /* reads info from hardware and fills in pbmda exception info record */ | 550 | /* reads info from hardware and fills in pbmda exception info record */ |
@@ -3203,6 +3216,32 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | |||
3203 | } | 3216 | } |
3204 | 3217 | ||
3205 | #ifdef CONFIG_DEBUG_FS | 3218 | #ifdef CONFIG_DEBUG_FS |
3219 | |||
3220 | /* Get the next element in the ring buffer of profile entries | ||
3221 | * and grab a reference to the structure | ||
3222 | */ | ||
3223 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) | ||
3224 | { | ||
3225 | struct fifo_gk20a *f = &g->fifo; | ||
3226 | struct fifo_profile_gk20a *profile; | ||
3227 | unsigned int index; | ||
3228 | |||
3229 | /* If kref is zero, profiling is not enabled */ | ||
3230 | if (!kref_get_unless_zero(&f->profile.ref)) | ||
3231 | return NULL; | ||
3232 | index = atomic_inc_return(&f->profile.get); | ||
3233 | profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; | ||
3234 | |||
3235 | return profile; | ||
3236 | } | ||
3237 | |||
3238 | /* Free the reference to the structure. This allows deferred cleanups */ | ||
3239 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
3240 | struct fifo_profile_gk20a *profile) | ||
3241 | { | ||
3242 | kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
3243 | } | ||
3244 | |||
3206 | static void *gk20a_fifo_sched_debugfs_seq_start( | 3245 | static void *gk20a_fifo_sched_debugfs_seq_start( |
3207 | struct seq_file *s, loff_t *pos) | 3246 | struct seq_file *s, loff_t *pos) |
3208 | { | 3247 | { |
@@ -3316,6 +3355,168 @@ static const struct file_operations gk20a_fifo_sched_debugfs_fops = { | |||
3316 | .release = seq_release | 3355 | .release = seq_release |
3317 | }; | 3356 | }; |
3318 | 3357 | ||
3358 | static void __gk20a_fifo_profile_free(struct kref *ref) | ||
3359 | { | ||
3360 | struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, | ||
3361 | profile.ref); | ||
3362 | vfree(f->profile.data); | ||
3363 | vfree(f->profile.sorted); | ||
3364 | } | ||
3365 | |||
3366 | static int gk20a_fifo_profile_enable(void *data, u64 val) | ||
3367 | { | ||
3368 | struct gk20a *g = (struct gk20a *) data; | ||
3369 | struct fifo_gk20a *f = &g->fifo; | ||
3370 | |||
3371 | |||
3372 | nvgpu_mutex_acquire(&f->profile.lock); | ||
3373 | if (val == 0) { | ||
3374 | if (f->profile.enabled) { | ||
3375 | f->profile.enabled = false; | ||
3376 | kref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
3377 | } | ||
3378 | } else { | ||
3379 | if (!f->profile.enabled) { | ||
3380 | /* not kref init as it can have a running condition if | ||
3381 | * we enable/disable/enable while kickoff is happening | ||
3382 | */ | ||
3383 | if (!kref_get_unless_zero(&f->profile.ref)) { | ||
3384 | f->profile.data = vzalloc( | ||
3385 | FIFO_PROFILING_ENTRIES * | ||
3386 | sizeof(struct fifo_profile_gk20a)); | ||
3387 | f->profile.sorted = vzalloc( | ||
3388 | FIFO_PROFILING_ENTRIES * | ||
3389 | sizeof(u64)); | ||
3390 | if (!(f->profile.data && f->profile.sorted)) { | ||
3391 | vfree(f->profile.data); | ||
3392 | vfree(f->profile.sorted); | ||
3393 | nvgpu_mutex_release(&f->profile.lock); | ||
3394 | return -ENOMEM; | ||
3395 | } | ||
3396 | kref_init(&f->profile.ref); | ||
3397 | } | ||
3398 | atomic_set(&f->profile.get, 0); | ||
3399 | f->profile.enabled = true; | ||
3400 | } | ||
3401 | } | ||
3402 | nvgpu_mutex_release(&f->profile.lock); | ||
3403 | |||
3404 | return 0; | ||
3405 | } | ||
3406 | |||
3407 | DEFINE_SIMPLE_ATTRIBUTE( | ||
3408 | gk20a_fifo_profile_enable_debugfs_fops, | ||
3409 | NULL, | ||
3410 | gk20a_fifo_profile_enable, | ||
3411 | "%llu\n" | ||
3412 | ); | ||
3413 | |||
3414 | static int __profile_cmp(const void *a, const void *b) | ||
3415 | { | ||
3416 | return *((unsigned long long *) a) - *((unsigned long long *) b); | ||
3417 | } | ||
3418 | |||
3419 | /* | ||
3420 | * This uses about 800b in the stack, but the function using it is not part | ||
3421 | * of a callstack where much memory is being used, so it is fine | ||
3422 | */ | ||
3423 | #define PERCENTILE_WIDTH 5 | ||
3424 | #define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) | ||
3425 | |||
3426 | static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, | ||
3427 | u64 *percentiles, u32 index_end, u32 index_start) | ||
3428 | { | ||
3429 | unsigned int nelem = 0; | ||
3430 | unsigned int index; | ||
3431 | struct fifo_profile_gk20a *profile; | ||
3432 | |||
3433 | for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { | ||
3434 | profile = &g->fifo.profile.data[index]; | ||
3435 | |||
3436 | if (profile->timestamp[index_end] > | ||
3437 | profile->timestamp[index_start]) { | ||
3438 | /* This is a valid element */ | ||
3439 | g->fifo.profile.sorted[nelem] = | ||
3440 | profile->timestamp[index_end] - | ||
3441 | profile->timestamp[index_start]; | ||
3442 | nelem++; | ||
3443 | } | ||
3444 | } | ||
3445 | |||
3446 | /* sort it */ | ||
3447 | sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), | ||
3448 | __profile_cmp, NULL); | ||
3449 | |||
3450 | /* build ranges */ | ||
3451 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
3452 | percentiles[index] = | ||
3453 | g->fifo.profile.sorted[(PERCENTILE_WIDTH * index * | ||
3454 | nelem)/100]; | ||
3455 | return nelem; | ||
3456 | } | ||
3457 | |||
3458 | static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) | ||
3459 | { | ||
3460 | struct gk20a *g = s->private; | ||
3461 | unsigned int get, nelem, index; | ||
3462 | /* | ||
3463 | * 800B in the stack, but function is declared statically and only | ||
3464 | * called from debugfs handler | ||
3465 | */ | ||
3466 | u64 percentiles_ioctl[PERCENTILE_RANGES]; | ||
3467 | u64 percentiles_kickoff[PERCENTILE_RANGES]; | ||
3468 | u64 percentiles_jobtracking[PERCENTILE_RANGES]; | ||
3469 | u64 percentiles_append[PERCENTILE_RANGES]; | ||
3470 | u64 percentiles_userd[PERCENTILE_RANGES]; | ||
3471 | |||
3472 | if (!kref_get_unless_zero(&g->fifo.profile.ref)) { | ||
3473 | seq_printf(s, "Profiling disabled\n"); | ||
3474 | return 0; | ||
3475 | } | ||
3476 | |||
3477 | get = atomic_read(&g->fifo.profile.get); | ||
3478 | |||
3479 | __gk20a_fifo_create_stats(g, percentiles_ioctl, | ||
3480 | PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); | ||
3481 | __gk20a_fifo_create_stats(g, percentiles_kickoff, | ||
3482 | PROFILE_END, PROFILE_ENTRY); | ||
3483 | __gk20a_fifo_create_stats(g, percentiles_jobtracking, | ||
3484 | PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); | ||
3485 | __gk20a_fifo_create_stats(g, percentiles_append, | ||
3486 | PROFILE_APPEND, PROFILE_JOB_TRACKING); | ||
3487 | nelem = __gk20a_fifo_create_stats(g, percentiles_userd, | ||
3488 | PROFILE_END, PROFILE_APPEND); | ||
3489 | |||
3490 | seq_printf(s, "Number of kickoffs: %d\n", nelem); | ||
3491 | seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); | ||
3492 | |||
3493 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
3494 | seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", | ||
3495 | PERCENTILE_WIDTH * (index+1), | ||
3496 | percentiles_ioctl[index], | ||
3497 | percentiles_kickoff[index], | ||
3498 | percentiles_append[index], | ||
3499 | percentiles_jobtracking[index], | ||
3500 | percentiles_userd[index]); | ||
3501 | |||
3502 | kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
3503 | |||
3504 | return 0; | ||
3505 | } | ||
3506 | |||
3507 | static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) | ||
3508 | { | ||
3509 | return single_open(file, gk20a_fifo_profile_stats, inode->i_private); | ||
3510 | } | ||
3511 | |||
3512 | static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { | ||
3513 | .open = gk20a_fifo_profile_stats_open, | ||
3514 | .read = seq_read, | ||
3515 | .llseek = seq_lseek, | ||
3516 | .release = single_release, | ||
3517 | }; | ||
3518 | |||
3519 | |||
3319 | void gk20a_fifo_debugfs_init(struct device *dev) | 3520 | void gk20a_fifo_debugfs_init(struct device *dev) |
3320 | { | 3521 | { |
3321 | struct gk20a_platform *platform = dev_get_drvdata(dev); | 3522 | struct gk20a_platform *platform = dev_get_drvdata(dev); |
@@ -3323,6 +3524,8 @@ void gk20a_fifo_debugfs_init(struct device *dev) | |||
3323 | 3524 | ||
3324 | struct dentry *gpu_root = platform->debugfs; | 3525 | struct dentry *gpu_root = platform->debugfs; |
3325 | struct dentry *fifo_root; | 3526 | struct dentry *fifo_root; |
3527 | struct dentry *profile_root; | ||
3528 | |||
3326 | 3529 | ||
3327 | fifo_root = debugfs_create_dir("fifo", gpu_root); | 3530 | fifo_root = debugfs_create_dir("fifo", gpu_root); |
3328 | if (IS_ERR_OR_NULL(fifo_root)) | 3531 | if (IS_ERR_OR_NULL(fifo_root)) |
@@ -3333,6 +3536,21 @@ void gk20a_fifo_debugfs_init(struct device *dev) | |||
3333 | debugfs_create_file("sched", 0600, fifo_root, g, | 3536 | debugfs_create_file("sched", 0600, fifo_root, g, |
3334 | &gk20a_fifo_sched_debugfs_fops); | 3537 | &gk20a_fifo_sched_debugfs_fops); |
3335 | 3538 | ||
3539 | profile_root = debugfs_create_dir("profile", fifo_root); | ||
3540 | if (IS_ERR_OR_NULL(profile_root)) | ||
3541 | return; | ||
3542 | |||
3543 | nvgpu_mutex_init(&g->fifo.profile.lock); | ||
3544 | g->fifo.profile.enabled = false; | ||
3545 | atomic_set(&g->fifo.profile.get, 0); | ||
3546 | atomic_set(&g->fifo.profile.ref.refcount, 0); | ||
3547 | |||
3548 | debugfs_create_file("enable", 0600, profile_root, g, | ||
3549 | &gk20a_fifo_profile_enable_debugfs_fops); | ||
3550 | |||
3551 | debugfs_create_file("stats", 0600, profile_root, g, | ||
3552 | &gk20a_fifo_profile_stats_debugfs_fops); | ||
3553 | |||
3336 | } | 3554 | } |
3337 | #endif /* CONFIG_DEBUG_FS */ | 3555 | #endif /* CONFIG_DEBUG_FS */ |
3338 | 3556 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 147d1bea..75c801c6 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -30,6 +30,15 @@ | |||
30 | #define FIFO_INVAL_CHANNEL_ID ((u32)~0) | 30 | #define FIFO_INVAL_CHANNEL_ID ((u32)~0) |
31 | #define FIFO_INVAL_TSG_ID ((u32)~0) | 31 | #define FIFO_INVAL_TSG_ID ((u32)~0) |
32 | 32 | ||
33 | /* | ||
34 | * Number of entries in the kickoff latency buffer, used to calculate | ||
35 | * the profiling and histogram. This number is calculated to be statistically | ||
36 | * significative on a histogram on a 5% step | ||
37 | */ | ||
38 | #ifdef CONFIG_DEBUG_FS | ||
39 | #define FIFO_PROFILING_ENTRIES 16384 | ||
40 | #endif | ||
41 | |||
33 | /* generally corresponds to the "pbdma" engine */ | 42 | /* generally corresponds to the "pbdma" engine */ |
34 | 43 | ||
35 | struct fifo_runlist_info_gk20a { | 44 | struct fifo_runlist_info_gk20a { |
@@ -99,6 +108,20 @@ struct fifo_engine_info_gk20a { | |||
99 | 108 | ||
100 | }; | 109 | }; |
101 | 110 | ||
111 | enum { | ||
112 | PROFILE_IOCTL_ENTRY = 0, | ||
113 | PROFILE_ENTRY, | ||
114 | PROFILE_JOB_TRACKING, | ||
115 | PROFILE_APPEND, | ||
116 | PROFILE_END, | ||
117 | PROFILE_IOCTL_EXIT, | ||
118 | PROFILE_MAX | ||
119 | }; | ||
120 | |||
121 | struct fifo_profile_gk20a { | ||
122 | u64 timestamp[PROFILE_MAX]; | ||
123 | }; | ||
124 | |||
102 | struct fifo_gk20a { | 125 | struct fifo_gk20a { |
103 | struct gk20a *g; | 126 | struct gk20a *g; |
104 | unsigned int num_channels; | 127 | unsigned int num_channels; |
@@ -115,7 +138,16 @@ struct fifo_gk20a { | |||
115 | 138 | ||
116 | struct fifo_runlist_info_gk20a *runlist_info; | 139 | struct fifo_runlist_info_gk20a *runlist_info; |
117 | u32 max_runlists; | 140 | u32 max_runlists; |
118 | 141 | #ifdef CONFIG_DEBUG_FS | |
142 | struct { | ||
143 | struct fifo_profile_gk20a *data; | ||
144 | atomic_t get; | ||
145 | bool enabled; | ||
146 | u64 *sorted; | ||
147 | struct kref ref; | ||
148 | struct nvgpu_mutex lock; | ||
149 | } profile; | ||
150 | #endif | ||
119 | struct mem_desc userd; | 151 | struct mem_desc userd; |
120 | u32 userd_entry_size; | 152 | u32 userd_entry_size; |
121 | 153 | ||
@@ -275,5 +307,10 @@ void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist); | |||
275 | u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c); | 307 | u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c); |
276 | void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c); | 308 | void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c); |
277 | bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid); | 309 | bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid); |
310 | #ifdef CONFIG_DEBUG_FS | ||
311 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g); | ||
312 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
313 | struct fifo_profile_gk20a *profile); | ||
314 | #endif | ||
278 | 315 | ||
279 | #endif /*__GR_GK20A_H__*/ | 316 | #endif /*__GR_GK20A_H__*/ |