summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2017-02-21 18:36:49 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-07 16:42:28 -0500
commitb9feba6efc48743da70e474d40b7889a7efb4ba5 (patch)
tree668fed9a239d27dfc80abe525c43f6f864c90142 /drivers
parentb9991767cca9e4166e83ab03a07bf79316cf749a (diff)
gpu: nvgpu: in-kernel kickoff profiling
Add a debugfs interface to profile the kickoff ioctl it provides the probability distribution and separates the information between time spent in: the full ioctl, the kickoff function, the amount of time spent in job tracking and the amount of time doing pushbuffer copies JIRA: EVLR-1003 Change-Id: I9888b114c3fbced61b1cf134c79f7a8afce15f56 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1308997 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c31
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c218
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h39
6 files changed, 289 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index d43bc93f..d19479a2 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -727,7 +727,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
727 } 727 }
728 728
729 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, 729 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
730 num_entries, flags, fence, fence_out, true); 730 num_entries, flags, fence, fence_out, true,
731 NULL);
731} 732}
732 733
733static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) 734static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index fd248313..db1ac539 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -653,7 +653,7 @@ int gk20a_ce_execute_ops(struct device *dev,
653 653
654 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, 654 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
655 1, submit_flags, &fence, 655 1, submit_flags, &fence,
656 &ce_cmd_buf_fence_out, false); 656 &ce_cmd_buf_fence_out, false, NULL);
657 657
658 if (!ret) { 658 if (!ret) {
659 memcpy((void *)(cmd_buf_cpu_va + fence_index), 659 memcpy((void *)(cmd_buf_cpu_va + fence_index),
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 68e43259..f58b208c 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2987,7 +2987,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2987 u32 flags, 2987 u32 flags,
2988 struct nvgpu_fence *fence, 2988 struct nvgpu_fence *fence,
2989 struct gk20a_fence **fence_out, 2989 struct gk20a_fence **fence_out,
2990 bool force_need_sync_fence) 2990 bool force_need_sync_fence,
2991 struct fifo_profile_gk20a *profile)
2991{ 2992{
2992 struct gk20a *g = c->g; 2993 struct gk20a *g = c->g;
2993 struct device *d = dev_from_gk20a(g); 2994 struct device *d = dev_from_gk20a(g);
@@ -3036,6 +3037,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3036 return -EINVAL; 3037 return -EINVAL;
3037 } 3038 }
3038 3039
3040 if (profile)
3041 profile->timestamp[PROFILE_ENTRY] = sched_clock();
3042
3039#ifdef CONFIG_DEBUG_FS 3043#ifdef CONFIG_DEBUG_FS
3040 /* update debug settings */ 3044 /* update debug settings */
3041 if (g->ops.ltc.sync_debugfs) 3045 if (g->ops.ltc.sync_debugfs)
@@ -3162,6 +3166,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3162 goto clean_up_job; 3166 goto clean_up_job;
3163 } 3167 }
3164 3168
3169 if (profile)
3170 profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock();
3171
3165 if (wait_cmd) 3172 if (wait_cmd)
3166 gk20a_submit_append_priv_cmdbuf(c, wait_cmd); 3173 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
3167 3174
@@ -3184,6 +3191,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3184 if (need_job_tracking) 3191 if (need_job_tracking)
3185 /* TODO! Check for errors... */ 3192 /* TODO! Check for errors... */
3186 gk20a_channel_add_job(c, job, skip_buffer_refcounting); 3193 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
3194 if (profile)
3195 profile->timestamp[PROFILE_APPEND] = sched_clock();
3187 3196
3188 g->ops.fifo.userd_gp_put(g, c); 3197 g->ops.fifo.userd_gp_put(g, c);
3189 3198
@@ -3197,6 +3206,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
3197 gk20a_dbg_info("post-submit put %d, get %d, size %d", 3206 gk20a_dbg_info("post-submit put %d, get %d, size %d",
3198 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); 3207 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
3199 3208
3209 if (profile)
3210 profile->timestamp[PROFILE_END] = sched_clock();
3200 gk20a_dbg_fn("done"); 3211 gk20a_dbg_fn("done");
3201 return err; 3212 return err;
3202 3213
@@ -3789,15 +3800,22 @@ static int gk20a_ioctl_channel_submit_gpfifo(
3789 struct nvgpu_submit_gpfifo_args *args) 3800 struct nvgpu_submit_gpfifo_args *args)
3790{ 3801{
3791 struct gk20a_fence *fence_out; 3802 struct gk20a_fence *fence_out;
3803 struct fifo_profile_gk20a *profile = NULL;
3804
3792 int ret = 0; 3805 int ret = 0;
3793 gk20a_dbg_fn(""); 3806 gk20a_dbg_fn("");
3794 3807
3808#ifdef CONFIG_DEBUG_FS
3809 profile = gk20a_fifo_profile_acquire(ch->g);
3810
3811 if (profile)
3812 profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock();
3813#endif
3795 if (ch->has_timedout) 3814 if (ch->has_timedout)
3796 return -ETIMEDOUT; 3815 return -ETIMEDOUT;
3797
3798 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, 3816 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
3799 args->flags, &args->fence, 3817 args->flags, &args->fence,
3800 &fence_out, false); 3818 &fence_out, false, profile);
3801 3819
3802 if (ret) 3820 if (ret)
3803 goto clean_up; 3821 goto clean_up;
@@ -3816,7 +3834,12 @@ static int gk20a_ioctl_channel_submit_gpfifo(
3816 } 3834 }
3817 } 3835 }
3818 gk20a_fence_put(fence_out); 3836 gk20a_fence_put(fence_out);
3819 3837#ifdef CONFIG_DEBUG_FS
3838 if (profile) {
3839 profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock();
3840 gk20a_fifo_profile_release(ch->g, profile);
3841 }
3842#endif
3820clean_up: 3843clean_up:
3821 return ret; 3844 return ret;
3822} 3845}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index d9913cd7..42550632 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -33,6 +33,7 @@ struct gk20a;
33struct gr_gk20a; 33struct gr_gk20a;
34struct dbg_session_gk20a; 34struct dbg_session_gk20a;
35struct gk20a_fence; 35struct gk20a_fence;
36struct fifo_profile_gk20a;
36 37
37#include "channel_sync_gk20a.h" 38#include "channel_sync_gk20a.h"
38 39
@@ -344,7 +345,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
344 u32 flags, 345 u32 flags,
345 struct nvgpu_fence *fence, 346 struct nvgpu_fence *fence,
346 struct gk20a_fence **fence_out, 347 struct gk20a_fence **fence_out,
347 bool force_need_sync_fence); 348 bool force_need_sync_fence,
349 struct fifo_profile_gk20a *profile);
348 350
349int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 351int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
350 struct nvgpu_alloc_gpfifo_ex_args *args); 352 struct nvgpu_alloc_gpfifo_ex_args *args);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index d072fb48..35d56ce4 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -23,6 +23,7 @@
23#include <trace/events/gk20a.h> 23#include <trace/events/gk20a.h>
24#include <linux/dma-mapping.h> 24#include <linux/dma-mapping.h>
25#include <linux/nvhost.h> 25#include <linux/nvhost.h>
26#include <linux/sort.h>
26 27
27#include <nvgpu/timers.h> 28#include <nvgpu/timers.h>
28#include <nvgpu/semaphore.h> 29#include <nvgpu/semaphore.h>
@@ -46,6 +47,10 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
46 bool wait_for_finish); 47 bool wait_for_finish);
47static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); 48static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
48 49
50#ifdef CONFIG_DEBUG_FS
51static void __gk20a_fifo_profile_free(struct kref *ref);
52#endif
53
49u32 gk20a_fifo_get_engine_ids(struct gk20a *g, 54u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
50 u32 engine_id[], u32 engine_id_sz, 55 u32 engine_id[], u32 engine_id_sz,
51 u32 engine_enum) 56 u32 engine_enum)
@@ -532,6 +537,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
532 f->engine_info = NULL; 537 f->engine_info = NULL;
533 kfree(f->active_engines_list); 538 kfree(f->active_engines_list);
534 f->active_engines_list = NULL; 539 f->active_engines_list = NULL;
540#ifdef CONFIG_DEBUG_FS
541 nvgpu_mutex_acquire(&f->profile.lock);
542 if (f->profile.enabled) {
543 f->profile.enabled = false;
544 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
545 }
546 nvgpu_mutex_release(&f->profile.lock);
547#endif
535} 548}
536 549
537/* reads info from hardware and fills in pbmda exception info record */ 550/* reads info from hardware and fills in pbmda exception info record */
@@ -3203,6 +3216,32 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
3203} 3216}
3204 3217
3205#ifdef CONFIG_DEBUG_FS 3218#ifdef CONFIG_DEBUG_FS
3219
3220/* Get the next element in the ring buffer of profile entries
3221 * and grab a reference to the structure
3222 */
3223struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
3224{
3225 struct fifo_gk20a *f = &g->fifo;
3226 struct fifo_profile_gk20a *profile;
3227 unsigned int index;
3228
3229 /* If kref is zero, profiling is not enabled */
3230 if (!kref_get_unless_zero(&f->profile.ref))
3231 return NULL;
3232 index = atomic_inc_return(&f->profile.get);
3233 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
3234
3235 return profile;
3236}
3237
3238/* Free the reference to the structure. This allows deferred cleanups */
3239void gk20a_fifo_profile_release(struct gk20a *g,
3240 struct fifo_profile_gk20a *profile)
3241{
3242 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
3243}
3244
3206static void *gk20a_fifo_sched_debugfs_seq_start( 3245static void *gk20a_fifo_sched_debugfs_seq_start(
3207 struct seq_file *s, loff_t *pos) 3246 struct seq_file *s, loff_t *pos)
3208{ 3247{
@@ -3316,6 +3355,168 @@ static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
3316 .release = seq_release 3355 .release = seq_release
3317}; 3356};
3318 3357
3358static void __gk20a_fifo_profile_free(struct kref *ref)
3359{
3360 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
3361 profile.ref);
3362 vfree(f->profile.data);
3363 vfree(f->profile.sorted);
3364}
3365
3366static int gk20a_fifo_profile_enable(void *data, u64 val)
3367{
3368 struct gk20a *g = (struct gk20a *) data;
3369 struct fifo_gk20a *f = &g->fifo;
3370
3371
3372 nvgpu_mutex_acquire(&f->profile.lock);
3373 if (val == 0) {
3374 if (f->profile.enabled) {
3375 f->profile.enabled = false;
3376 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
3377 }
3378 } else {
3379 if (!f->profile.enabled) {
3380 /* not kref init as it can have a running condition if
3381 * we enable/disable/enable while kickoff is happening
3382 */
3383 if (!kref_get_unless_zero(&f->profile.ref)) {
3384 f->profile.data = vzalloc(
3385 FIFO_PROFILING_ENTRIES *
3386 sizeof(struct fifo_profile_gk20a));
3387 f->profile.sorted = vzalloc(
3388 FIFO_PROFILING_ENTRIES *
3389 sizeof(u64));
3390 if (!(f->profile.data && f->profile.sorted)) {
3391 vfree(f->profile.data);
3392 vfree(f->profile.sorted);
3393 nvgpu_mutex_release(&f->profile.lock);
3394 return -ENOMEM;
3395 }
3396 kref_init(&f->profile.ref);
3397 }
3398 atomic_set(&f->profile.get, 0);
3399 f->profile.enabled = true;
3400 }
3401 }
3402 nvgpu_mutex_release(&f->profile.lock);
3403
3404 return 0;
3405}
3406
3407DEFINE_SIMPLE_ATTRIBUTE(
3408 gk20a_fifo_profile_enable_debugfs_fops,
3409 NULL,
3410 gk20a_fifo_profile_enable,
3411 "%llu\n"
3412);
3413
3414static int __profile_cmp(const void *a, const void *b)
3415{
3416 return *((unsigned long long *) a) - *((unsigned long long *) b);
3417}
3418
3419/*
3420 * This uses about 800b in the stack, but the function using it is not part
3421 * of a callstack where much memory is being used, so it is fine
3422 */
3423#define PERCENTILE_WIDTH 5
3424#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
3425
3426static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
3427 u64 *percentiles, u32 index_end, u32 index_start)
3428{
3429 unsigned int nelem = 0;
3430 unsigned int index;
3431 struct fifo_profile_gk20a *profile;
3432
3433 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
3434 profile = &g->fifo.profile.data[index];
3435
3436 if (profile->timestamp[index_end] >
3437 profile->timestamp[index_start]) {
3438 /* This is a valid element */
3439 g->fifo.profile.sorted[nelem] =
3440 profile->timestamp[index_end] -
3441 profile->timestamp[index_start];
3442 nelem++;
3443 }
3444 }
3445
3446 /* sort it */
3447 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
3448 __profile_cmp, NULL);
3449
3450 /* build ranges */
3451 for (index = 0; index < PERCENTILE_RANGES; index++)
3452 percentiles[index] =
3453 g->fifo.profile.sorted[(PERCENTILE_WIDTH * index *
3454 nelem)/100];
3455 return nelem;
3456}
3457
3458static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
3459{
3460 struct gk20a *g = s->private;
3461 unsigned int get, nelem, index;
3462 /*
3463 * 800B in the stack, but function is declared statically and only
3464 * called from debugfs handler
3465 */
3466 u64 percentiles_ioctl[PERCENTILE_RANGES];
3467 u64 percentiles_kickoff[PERCENTILE_RANGES];
3468 u64 percentiles_jobtracking[PERCENTILE_RANGES];
3469 u64 percentiles_append[PERCENTILE_RANGES];
3470 u64 percentiles_userd[PERCENTILE_RANGES];
3471
3472 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
3473 seq_printf(s, "Profiling disabled\n");
3474 return 0;
3475 }
3476
3477 get = atomic_read(&g->fifo.profile.get);
3478
3479 __gk20a_fifo_create_stats(g, percentiles_ioctl,
3480 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
3481 __gk20a_fifo_create_stats(g, percentiles_kickoff,
3482 PROFILE_END, PROFILE_ENTRY);
3483 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
3484 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
3485 __gk20a_fifo_create_stats(g, percentiles_append,
3486 PROFILE_APPEND, PROFILE_JOB_TRACKING);
3487 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
3488 PROFILE_END, PROFILE_APPEND);
3489
3490 seq_printf(s, "Number of kickoffs: %d\n", nelem);
3491 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
3492
3493 for (index = 0; index < PERCENTILE_RANGES; index++)
3494 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
3495 PERCENTILE_WIDTH * (index+1),
3496 percentiles_ioctl[index],
3497 percentiles_kickoff[index],
3498 percentiles_append[index],
3499 percentiles_jobtracking[index],
3500 percentiles_userd[index]);
3501
3502 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
3503
3504 return 0;
3505}
3506
3507static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
3508{
3509 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
3510}
3511
3512static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
3513 .open = gk20a_fifo_profile_stats_open,
3514 .read = seq_read,
3515 .llseek = seq_lseek,
3516 .release = single_release,
3517};
3518
3519
3319void gk20a_fifo_debugfs_init(struct device *dev) 3520void gk20a_fifo_debugfs_init(struct device *dev)
3320{ 3521{
3321 struct gk20a_platform *platform = dev_get_drvdata(dev); 3522 struct gk20a_platform *platform = dev_get_drvdata(dev);
@@ -3323,6 +3524,8 @@ void gk20a_fifo_debugfs_init(struct device *dev)
3323 3524
3324 struct dentry *gpu_root = platform->debugfs; 3525 struct dentry *gpu_root = platform->debugfs;
3325 struct dentry *fifo_root; 3526 struct dentry *fifo_root;
3527 struct dentry *profile_root;
3528
3326 3529
3327 fifo_root = debugfs_create_dir("fifo", gpu_root); 3530 fifo_root = debugfs_create_dir("fifo", gpu_root);
3328 if (IS_ERR_OR_NULL(fifo_root)) 3531 if (IS_ERR_OR_NULL(fifo_root))
@@ -3333,6 +3536,21 @@ void gk20a_fifo_debugfs_init(struct device *dev)
3333 debugfs_create_file("sched", 0600, fifo_root, g, 3536 debugfs_create_file("sched", 0600, fifo_root, g,
3334 &gk20a_fifo_sched_debugfs_fops); 3537 &gk20a_fifo_sched_debugfs_fops);
3335 3538
3539 profile_root = debugfs_create_dir("profile", fifo_root);
3540 if (IS_ERR_OR_NULL(profile_root))
3541 return;
3542
3543 nvgpu_mutex_init(&g->fifo.profile.lock);
3544 g->fifo.profile.enabled = false;
3545 atomic_set(&g->fifo.profile.get, 0);
3546 atomic_set(&g->fifo.profile.ref.refcount, 0);
3547
3548 debugfs_create_file("enable", 0600, profile_root, g,
3549 &gk20a_fifo_profile_enable_debugfs_fops);
3550
3551 debugfs_create_file("stats", 0600, profile_root, g,
3552 &gk20a_fifo_profile_stats_debugfs_fops);
3553
3336} 3554}
3337#endif /* CONFIG_DEBUG_FS */ 3555#endif /* CONFIG_DEBUG_FS */
3338 3556
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 147d1bea..75c801c6 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -30,6 +30,15 @@
30#define FIFO_INVAL_CHANNEL_ID ((u32)~0) 30#define FIFO_INVAL_CHANNEL_ID ((u32)~0)
31#define FIFO_INVAL_TSG_ID ((u32)~0) 31#define FIFO_INVAL_TSG_ID ((u32)~0)
32 32
33/*
34 * Number of entries in the kickoff latency buffer, used to calculate
35 * the profiling and histogram. This number is calculated to be statistically
36 * significative on a histogram on a 5% step
37 */
38#ifdef CONFIG_DEBUG_FS
39#define FIFO_PROFILING_ENTRIES 16384
40#endif
41
33/* generally corresponds to the "pbdma" engine */ 42/* generally corresponds to the "pbdma" engine */
34 43
35struct fifo_runlist_info_gk20a { 44struct fifo_runlist_info_gk20a {
@@ -99,6 +108,20 @@ struct fifo_engine_info_gk20a {
99 108
100}; 109};
101 110
111enum {
112 PROFILE_IOCTL_ENTRY = 0,
113 PROFILE_ENTRY,
114 PROFILE_JOB_TRACKING,
115 PROFILE_APPEND,
116 PROFILE_END,
117 PROFILE_IOCTL_EXIT,
118 PROFILE_MAX
119};
120
121struct fifo_profile_gk20a {
122 u64 timestamp[PROFILE_MAX];
123};
124
102struct fifo_gk20a { 125struct fifo_gk20a {
103 struct gk20a *g; 126 struct gk20a *g;
104 unsigned int num_channels; 127 unsigned int num_channels;
@@ -115,7 +138,16 @@ struct fifo_gk20a {
115 138
116 struct fifo_runlist_info_gk20a *runlist_info; 139 struct fifo_runlist_info_gk20a *runlist_info;
117 u32 max_runlists; 140 u32 max_runlists;
118 141#ifdef CONFIG_DEBUG_FS
142 struct {
143 struct fifo_profile_gk20a *data;
144 atomic_t get;
145 bool enabled;
146 u64 *sorted;
147 struct kref ref;
148 struct nvgpu_mutex lock;
149 } profile;
150#endif
119 struct mem_desc userd; 151 struct mem_desc userd;
120 u32 userd_entry_size; 152 u32 userd_entry_size;
121 153
@@ -275,5 +307,10 @@ void gk20a_get_ch_runlist_entry(struct channel_gk20a *ch, u32 *runlist);
275u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c); 307u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
276void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c); 308void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
277bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid); 309bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid);
310#ifdef CONFIG_DEBUG_FS
311struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g);
312void gk20a_fifo_profile_release(struct gk20a *g,
313 struct fifo_profile_gk20a *profile);
314#endif
278 315
279#endif /*__GR_GK20A_H__*/ 316#endif /*__GR_GK20A_H__*/