diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index d072fb48..35d56ce4 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <trace/events/gk20a.h> | 23 | #include <trace/events/gk20a.h> |
24 | #include <linux/dma-mapping.h> | 24 | #include <linux/dma-mapping.h> |
25 | #include <linux/nvhost.h> | 25 | #include <linux/nvhost.h> |
26 | #include <linux/sort.h> | ||
26 | 27 | ||
27 | #include <nvgpu/timers.h> | 28 | #include <nvgpu/timers.h> |
28 | #include <nvgpu/semaphore.h> | 29 | #include <nvgpu/semaphore.h> |
@@ -46,6 +47,10 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
46 | bool wait_for_finish); | 47 | bool wait_for_finish); |
47 | static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); | 48 | static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); |
48 | 49 | ||
50 | #ifdef CONFIG_DEBUG_FS | ||
51 | static void __gk20a_fifo_profile_free(struct kref *ref); | ||
52 | #endif | ||
53 | |||
49 | u32 gk20a_fifo_get_engine_ids(struct gk20a *g, | 54 | u32 gk20a_fifo_get_engine_ids(struct gk20a *g, |
50 | u32 engine_id[], u32 engine_id_sz, | 55 | u32 engine_id[], u32 engine_id_sz, |
51 | u32 engine_enum) | 56 | u32 engine_enum) |
@@ -532,6 +537,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | |||
532 | f->engine_info = NULL; | 537 | f->engine_info = NULL; |
533 | kfree(f->active_engines_list); | 538 | kfree(f->active_engines_list); |
534 | f->active_engines_list = NULL; | 539 | f->active_engines_list = NULL; |
540 | #ifdef CONFIG_DEBUG_FS | ||
541 | nvgpu_mutex_acquire(&f->profile.lock); | ||
542 | if (f->profile.enabled) { | ||
543 | f->profile.enabled = false; | ||
544 | kref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
545 | } | ||
546 | nvgpu_mutex_release(&f->profile.lock); | ||
547 | #endif | ||
535 | } | 548 | } |
536 | 549 | ||
537 | /* reads info from hardware and fills in pbmda exception info record */ | 550 | /* reads info from hardware and fills in pbmda exception info record */ |
@@ -3203,6 +3216,32 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, | |||
3203 | } | 3216 | } |
3204 | 3217 | ||
3205 | #ifdef CONFIG_DEBUG_FS | 3218 | #ifdef CONFIG_DEBUG_FS |
3219 | |||
3220 | /* Get the next element in the ring buffer of profile entries | ||
3221 | * and grab a reference to the structure | ||
3222 | */ | ||
3223 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) | ||
3224 | { | ||
3225 | struct fifo_gk20a *f = &g->fifo; | ||
3226 | struct fifo_profile_gk20a *profile; | ||
3227 | unsigned int index; | ||
3228 | |||
3229 | /* If kref is zero, profiling is not enabled */ | ||
3230 | if (!kref_get_unless_zero(&f->profile.ref)) | ||
3231 | return NULL; | ||
3232 | index = atomic_inc_return(&f->profile.get); | ||
3233 | profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; | ||
3234 | |||
3235 | return profile; | ||
3236 | } | ||
3237 | |||
3238 | /* Free the reference to the structure. This allows deferred cleanups */ | ||
3239 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
3240 | struct fifo_profile_gk20a *profile) | ||
3241 | { | ||
3242 | kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
3243 | } | ||
3244 | |||
3206 | static void *gk20a_fifo_sched_debugfs_seq_start( | 3245 | static void *gk20a_fifo_sched_debugfs_seq_start( |
3207 | struct seq_file *s, loff_t *pos) | 3246 | struct seq_file *s, loff_t *pos) |
3208 | { | 3247 | { |
@@ -3316,6 +3355,168 @@ static const struct file_operations gk20a_fifo_sched_debugfs_fops = { | |||
3316 | .release = seq_release | 3355 | .release = seq_release |
3317 | }; | 3356 | }; |
3318 | 3357 | ||
3358 | static void __gk20a_fifo_profile_free(struct kref *ref) | ||
3359 | { | ||
3360 | struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, | ||
3361 | profile.ref); | ||
3362 | vfree(f->profile.data); | ||
3363 | vfree(f->profile.sorted); | ||
3364 | } | ||
3365 | |||
3366 | static int gk20a_fifo_profile_enable(void *data, u64 val) | ||
3367 | { | ||
3368 | struct gk20a *g = (struct gk20a *) data; | ||
3369 | struct fifo_gk20a *f = &g->fifo; | ||
3370 | |||
3371 | |||
3372 | nvgpu_mutex_acquire(&f->profile.lock); | ||
3373 | if (val == 0) { | ||
3374 | if (f->profile.enabled) { | ||
3375 | f->profile.enabled = false; | ||
3376 | kref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
3377 | } | ||
3378 | } else { | ||
3379 | if (!f->profile.enabled) { | ||
3380 | /* not kref init as it can have a running condition if | ||
3381 | * we enable/disable/enable while kickoff is happening | ||
3382 | */ | ||
3383 | if (!kref_get_unless_zero(&f->profile.ref)) { | ||
3384 | f->profile.data = vzalloc( | ||
3385 | FIFO_PROFILING_ENTRIES * | ||
3386 | sizeof(struct fifo_profile_gk20a)); | ||
3387 | f->profile.sorted = vzalloc( | ||
3388 | FIFO_PROFILING_ENTRIES * | ||
3389 | sizeof(u64)); | ||
3390 | if (!(f->profile.data && f->profile.sorted)) { | ||
3391 | vfree(f->profile.data); | ||
3392 | vfree(f->profile.sorted); | ||
3393 | nvgpu_mutex_release(&f->profile.lock); | ||
3394 | return -ENOMEM; | ||
3395 | } | ||
3396 | kref_init(&f->profile.ref); | ||
3397 | } | ||
3398 | atomic_set(&f->profile.get, 0); | ||
3399 | f->profile.enabled = true; | ||
3400 | } | ||
3401 | } | ||
3402 | nvgpu_mutex_release(&f->profile.lock); | ||
3403 | |||
3404 | return 0; | ||
3405 | } | ||
3406 | |||
3407 | DEFINE_SIMPLE_ATTRIBUTE( | ||
3408 | gk20a_fifo_profile_enable_debugfs_fops, | ||
3409 | NULL, | ||
3410 | gk20a_fifo_profile_enable, | ||
3411 | "%llu\n" | ||
3412 | ); | ||
3413 | |||
3414 | static int __profile_cmp(const void *a, const void *b) | ||
3415 | { | ||
3416 | return *((unsigned long long *) a) - *((unsigned long long *) b); | ||
3417 | } | ||
3418 | |||
3419 | /* | ||
3420 | * This uses about 800b in the stack, but the function using it is not part | ||
3421 | * of a callstack where much memory is being used, so it is fine | ||
3422 | */ | ||
3423 | #define PERCENTILE_WIDTH 5 | ||
3424 | #define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) | ||
3425 | |||
3426 | static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, | ||
3427 | u64 *percentiles, u32 index_end, u32 index_start) | ||
3428 | { | ||
3429 | unsigned int nelem = 0; | ||
3430 | unsigned int index; | ||
3431 | struct fifo_profile_gk20a *profile; | ||
3432 | |||
3433 | for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { | ||
3434 | profile = &g->fifo.profile.data[index]; | ||
3435 | |||
3436 | if (profile->timestamp[index_end] > | ||
3437 | profile->timestamp[index_start]) { | ||
3438 | /* This is a valid element */ | ||
3439 | g->fifo.profile.sorted[nelem] = | ||
3440 | profile->timestamp[index_end] - | ||
3441 | profile->timestamp[index_start]; | ||
3442 | nelem++; | ||
3443 | } | ||
3444 | } | ||
3445 | |||
3446 | /* sort it */ | ||
3447 | sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), | ||
3448 | __profile_cmp, NULL); | ||
3449 | |||
3450 | /* build ranges */ | ||
3451 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
3452 | percentiles[index] = | ||
3453 | g->fifo.profile.sorted[(PERCENTILE_WIDTH * index * | ||
3454 | nelem)/100]; | ||
3455 | return nelem; | ||
3456 | } | ||
3457 | |||
3458 | static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) | ||
3459 | { | ||
3460 | struct gk20a *g = s->private; | ||
3461 | unsigned int get, nelem, index; | ||
3462 | /* | ||
3463 | * 800B in the stack, but function is declared statically and only | ||
3464 | * called from debugfs handler | ||
3465 | */ | ||
3466 | u64 percentiles_ioctl[PERCENTILE_RANGES]; | ||
3467 | u64 percentiles_kickoff[PERCENTILE_RANGES]; | ||
3468 | u64 percentiles_jobtracking[PERCENTILE_RANGES]; | ||
3469 | u64 percentiles_append[PERCENTILE_RANGES]; | ||
3470 | u64 percentiles_userd[PERCENTILE_RANGES]; | ||
3471 | |||
3472 | if (!kref_get_unless_zero(&g->fifo.profile.ref)) { | ||
3473 | seq_printf(s, "Profiling disabled\n"); | ||
3474 | return 0; | ||
3475 | } | ||
3476 | |||
3477 | get = atomic_read(&g->fifo.profile.get); | ||
3478 | |||
3479 | __gk20a_fifo_create_stats(g, percentiles_ioctl, | ||
3480 | PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); | ||
3481 | __gk20a_fifo_create_stats(g, percentiles_kickoff, | ||
3482 | PROFILE_END, PROFILE_ENTRY); | ||
3483 | __gk20a_fifo_create_stats(g, percentiles_jobtracking, | ||
3484 | PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); | ||
3485 | __gk20a_fifo_create_stats(g, percentiles_append, | ||
3486 | PROFILE_APPEND, PROFILE_JOB_TRACKING); | ||
3487 | nelem = __gk20a_fifo_create_stats(g, percentiles_userd, | ||
3488 | PROFILE_END, PROFILE_APPEND); | ||
3489 | |||
3490 | seq_printf(s, "Number of kickoffs: %d\n", nelem); | ||
3491 | seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); | ||
3492 | |||
3493 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
3494 | seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", | ||
3495 | PERCENTILE_WIDTH * (index+1), | ||
3496 | percentiles_ioctl[index], | ||
3497 | percentiles_kickoff[index], | ||
3498 | percentiles_append[index], | ||
3499 | percentiles_jobtracking[index], | ||
3500 | percentiles_userd[index]); | ||
3501 | |||
3502 | kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
3503 | |||
3504 | return 0; | ||
3505 | } | ||
3506 | |||
3507 | static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) | ||
3508 | { | ||
3509 | return single_open(file, gk20a_fifo_profile_stats, inode->i_private); | ||
3510 | } | ||
3511 | |||
3512 | static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { | ||
3513 | .open = gk20a_fifo_profile_stats_open, | ||
3514 | .read = seq_read, | ||
3515 | .llseek = seq_lseek, | ||
3516 | .release = single_release, | ||
3517 | }; | ||
3518 | |||
3519 | |||
3319 | void gk20a_fifo_debugfs_init(struct device *dev) | 3520 | void gk20a_fifo_debugfs_init(struct device *dev) |
3320 | { | 3521 | { |
3321 | struct gk20a_platform *platform = dev_get_drvdata(dev); | 3522 | struct gk20a_platform *platform = dev_get_drvdata(dev); |
@@ -3323,6 +3524,8 @@ void gk20a_fifo_debugfs_init(struct device *dev) | |||
3323 | 3524 | ||
3324 | struct dentry *gpu_root = platform->debugfs; | 3525 | struct dentry *gpu_root = platform->debugfs; |
3325 | struct dentry *fifo_root; | 3526 | struct dentry *fifo_root; |
3527 | struct dentry *profile_root; | ||
3528 | |||
3326 | 3529 | ||
3327 | fifo_root = debugfs_create_dir("fifo", gpu_root); | 3530 | fifo_root = debugfs_create_dir("fifo", gpu_root); |
3328 | if (IS_ERR_OR_NULL(fifo_root)) | 3531 | if (IS_ERR_OR_NULL(fifo_root)) |
@@ -3333,6 +3536,21 @@ void gk20a_fifo_debugfs_init(struct device *dev) | |||
3333 | debugfs_create_file("sched", 0600, fifo_root, g, | 3536 | debugfs_create_file("sched", 0600, fifo_root, g, |
3334 | &gk20a_fifo_sched_debugfs_fops); | 3537 | &gk20a_fifo_sched_debugfs_fops); |
3335 | 3538 | ||
3539 | profile_root = debugfs_create_dir("profile", fifo_root); | ||
3540 | if (IS_ERR_OR_NULL(profile_root)) | ||
3541 | return; | ||
3542 | |||
3543 | nvgpu_mutex_init(&g->fifo.profile.lock); | ||
3544 | g->fifo.profile.enabled = false; | ||
3545 | atomic_set(&g->fifo.profile.get, 0); | ||
3546 | atomic_set(&g->fifo.profile.ref.refcount, 0); | ||
3547 | |||
3548 | debugfs_create_file("enable", 0600, profile_root, g, | ||
3549 | &gk20a_fifo_profile_enable_debugfs_fops); | ||
3550 | |||
3551 | debugfs_create_file("stats", 0600, profile_root, g, | ||
3552 | &gk20a_fifo_profile_stats_debugfs_fops); | ||
3553 | |||
3336 | } | 3554 | } |
3337 | #endif /* CONFIG_DEBUG_FS */ | 3555 | #endif /* CONFIG_DEBUG_FS */ |
3338 | 3556 | ||