summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c414
1 files changed, 53 insertions, 361 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index ac3a3d57..46560a56 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -29,12 +29,11 @@
29#include <nvgpu/log.h> 29#include <nvgpu/log.h>
30#include <nvgpu/soc.h> 30#include <nvgpu/soc.h>
31#include <nvgpu/atomic.h> 31#include <nvgpu/atomic.h>
32#include <nvgpu/sort.h>
33#include <nvgpu/bug.h> 32#include <nvgpu/bug.h>
34#include <nvgpu/log2.h> 33#include <nvgpu/log2.h>
34#include <nvgpu/debug.h>
35 35
36#include "gk20a.h" 36#include "gk20a.h"
37#include "debug_gk20a.h"
38#include "ctxsw_trace_gk20a.h" 37#include "ctxsw_trace_gk20a.h"
39#include "mm_gk20a.h" 38#include "mm_gk20a.h"
40 39
@@ -46,10 +45,6 @@
46#include <nvgpu/hw/gk20a/hw_mc_gk20a.h> 45#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
47#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 46#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
48 47
49#ifdef CONFIG_DEBUG_FS
50#include "platform_gk20a.h"
51#endif
52
53#define FECS_METHOD_WFI_RESTORE 0x80000 48#define FECS_METHOD_WFI_RESTORE 0x80000
54 49
55static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, 50static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -57,10 +52,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
57 bool wait_for_finish); 52 bool wait_for_finish);
58static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg); 53static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
59 54
60#ifdef CONFIG_DEBUG_FS
61static void __gk20a_fifo_profile_free(struct kref *ref);
62#endif
63
64u32 gk20a_fifo_get_engine_ids(struct gk20a *g, 55u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
65 u32 engine_id[], u32 engine_id_sz, 56 u32 engine_id[], u32 engine_id_sz,
66 u32 engine_enum) 57 u32 engine_enum)
@@ -562,14 +553,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
562 f->engine_info = NULL; 553 f->engine_info = NULL;
563 nvgpu_kfree(g, f->active_engines_list); 554 nvgpu_kfree(g, f->active_engines_list);
564 f->active_engines_list = NULL; 555 f->active_engines_list = NULL;
565#ifdef CONFIG_DEBUG_FS
566 nvgpu_mutex_acquire(&f->profile.lock);
567 if (f->profile.enabled) {
568 f->profile.enabled = false;
569 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
570 }
571 nvgpu_mutex_release(&f->profile.lock);
572#endif
573} 556}
574 557
575/* reads info from hardware and fills in pbmda exception info record */ 558/* reads info from hardware and fills in pbmda exception info record */
@@ -1543,7 +1526,7 @@ static bool gk20a_fifo_handle_mmu_fault(
1543 } else { 1526 } else {
1544 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); 1527 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
1545 fake_fault = false; 1528 fake_fault = false;
1546 gk20a_debug_dump(g->dev); 1529 gk20a_debug_dump(g);
1547 } 1530 }
1548 1531
1549 1532
@@ -1833,7 +1816,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1833 gk20a_channel_abort(ch, false); 1816 gk20a_channel_abort(ch, false);
1834 1817
1835 if (gk20a_fifo_error_ch(g, ch)) 1818 if (gk20a_fifo_error_ch(g, ch))
1836 gk20a_debug_dump(g->dev); 1819 gk20a_debug_dump(g);
1837 1820
1838 gk20a_channel_put(ch); 1821 gk20a_channel_put(ch);
1839 } 1822 }
@@ -1860,7 +1843,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1860 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1843 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1861 1844
1862 if (gk20a_fifo_error_tsg(g, tsg)) 1845 if (gk20a_fifo_error_tsg(g, tsg))
1863 gk20a_debug_dump(g->dev); 1846 gk20a_debug_dump(g);
1864 1847
1865 gk20a_fifo_abort_tsg(g, tsgid, false); 1848 gk20a_fifo_abort_tsg(g, tsgid, false);
1866 } 1849 }
@@ -1957,7 +1940,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1957 unsigned int id_type; 1940 unsigned int id_type;
1958 1941
1959 if (verbose) 1942 if (verbose)
1960 gk20a_debug_dump(g->dev); 1943 gk20a_debug_dump(g);
1961 1944
1962 if (g->ops.ltc.flush) 1945 if (g->ops.ltc.flush)
1963 g->ops.ltc.flush(g); 1946 g->ops.ltc.flush(g);
@@ -3441,345 +3424,6 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
3441 return NULL; 3424 return NULL;
3442} 3425}
3443 3426
3444#ifdef CONFIG_DEBUG_FS
3445
3446/* Get the next element in the ring buffer of profile entries
3447 * and grab a reference to the structure
3448 */
3449struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
3450{
3451 struct fifo_gk20a *f = &g->fifo;
3452 struct fifo_profile_gk20a *profile;
3453 unsigned int index;
3454
3455 /* If kref is zero, profiling is not enabled */
3456 if (!kref_get_unless_zero(&f->profile.ref))
3457 return NULL;
3458 index = atomic_inc_return(&f->profile.get);
3459 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
3460
3461 return profile;
3462}
3463
3464/* Free the reference to the structure. This allows deferred cleanups */
3465void gk20a_fifo_profile_release(struct gk20a *g,
3466 struct fifo_profile_gk20a *profile)
3467{
3468 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
3469}
3470
3471static void *gk20a_fifo_sched_debugfs_seq_start(
3472 struct seq_file *s, loff_t *pos)
3473{
3474 struct gk20a *g = s->private;
3475 struct fifo_gk20a *f = &g->fifo;
3476
3477 if (*pos >= f->num_channels)
3478 return NULL;
3479
3480 return &f->channel[*pos];
3481}
3482
3483static void *gk20a_fifo_sched_debugfs_seq_next(
3484 struct seq_file *s, void *v, loff_t *pos)
3485{
3486 struct gk20a *g = s->private;
3487 struct fifo_gk20a *f = &g->fifo;
3488
3489 ++(*pos);
3490 if (*pos >= f->num_channels)
3491 return NULL;
3492
3493 return &f->channel[*pos];
3494}
3495
3496static void gk20a_fifo_sched_debugfs_seq_stop(
3497 struct seq_file *s, void *v)
3498{
3499}
3500
3501static int gk20a_fifo_sched_debugfs_seq_show(
3502 struct seq_file *s, void *v)
3503{
3504 struct gk20a *g = s->private;
3505 struct fifo_gk20a *f = &g->fifo;
3506 struct channel_gk20a *ch = v;
3507 struct tsg_gk20a *tsg = NULL;
3508
3509 struct fifo_engine_info_gk20a *engine_info;
3510 struct fifo_runlist_info_gk20a *runlist;
3511 u32 runlist_id;
3512 int ret = SEQ_SKIP;
3513 u32 engine_id;
3514
3515 engine_id = gk20a_fifo_get_gr_engine_id(g);
3516 engine_info = (f->engine_info + engine_id);
3517 runlist_id = engine_info->runlist_id;
3518 runlist = &f->runlist_info[runlist_id];
3519
3520 if (ch == f->channel) {
3521 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
3522 seq_puts(s, " (usecs) (msecs)\n");
3523 ret = 0;
3524 }
3525
3526 if (!test_bit(ch->hw_chid, runlist->active_channels))
3527 return ret;
3528
3529 if (gk20a_channel_get(ch)) {
3530 if (gk20a_is_channel_marked_as_tsg(ch))
3531 tsg = &f->tsg[ch->tsgid];
3532
3533 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
3534 ch->hw_chid,
3535 ch->tsgid,
3536 ch->tgid,
3537 tsg ? tsg->timeslice_us : ch->timeslice_us,
3538 ch->timeout_ms_max,
3539 tsg ? tsg->interleave_level : ch->interleave_level,
3540 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
3541 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
3542 gk20a_channel_put(ch);
3543 }
3544 return 0;
3545}
3546
3547static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
3548 .start = gk20a_fifo_sched_debugfs_seq_start,
3549 .next = gk20a_fifo_sched_debugfs_seq_next,
3550 .stop = gk20a_fifo_sched_debugfs_seq_stop,
3551 .show = gk20a_fifo_sched_debugfs_seq_show
3552};
3553
3554static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
3555 struct file *file)
3556{
3557 int err;
3558
3559 if (!capable(CAP_SYS_ADMIN))
3560 return -EPERM;
3561
3562 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
3563 if (err)
3564 return err;
3565
3566 gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
3567
3568 ((struct seq_file *)file->private_data)->private = inode->i_private;
3569 return 0;
3570};
3571
3572/*
3573 * The file operations structure contains our open function along with
3574 * set of the canned seq_ ops.
3575 */
3576static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
3577 .owner = THIS_MODULE,
3578 .open = gk20a_fifo_sched_debugfs_open,
3579 .read = seq_read,
3580 .llseek = seq_lseek,
3581 .release = seq_release
3582};
3583
3584static void __gk20a_fifo_profile_free(struct kref *ref)
3585{
3586 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
3587 profile.ref);
3588 nvgpu_vfree(f->g, f->profile.data);
3589 nvgpu_vfree(f->g, f->profile.sorted);
3590}
3591
3592static int gk20a_fifo_profile_enable(void *data, u64 val)
3593{
3594 struct gk20a *g = (struct gk20a *) data;
3595 struct fifo_gk20a *f = &g->fifo;
3596
3597
3598 nvgpu_mutex_acquire(&f->profile.lock);
3599 if (val == 0) {
3600 if (f->profile.enabled) {
3601 f->profile.enabled = false;
3602 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
3603 }
3604 } else {
3605 if (!f->profile.enabled) {
3606 /* not kref init as it can have a running condition if
3607 * we enable/disable/enable while kickoff is happening
3608 */
3609 if (!kref_get_unless_zero(&f->profile.ref)) {
3610 f->profile.data = vzalloc(
3611 FIFO_PROFILING_ENTRIES *
3612 sizeof(struct fifo_profile_gk20a));
3613 f->profile.sorted = vzalloc(
3614 FIFO_PROFILING_ENTRIES *
3615 sizeof(u64));
3616 if (!(f->profile.data && f->profile.sorted)) {
3617 nvgpu_vfree(g, f->profile.data);
3618 nvgpu_vfree(g, f->profile.sorted);
3619 nvgpu_mutex_release(&f->profile.lock);
3620 return -ENOMEM;
3621 }
3622 kref_init(&f->profile.ref);
3623 }
3624 atomic_set(&f->profile.get, 0);
3625 f->profile.enabled = true;
3626 }
3627 }
3628 nvgpu_mutex_release(&f->profile.lock);
3629
3630 return 0;
3631}
3632
3633DEFINE_SIMPLE_ATTRIBUTE(
3634 gk20a_fifo_profile_enable_debugfs_fops,
3635 NULL,
3636 gk20a_fifo_profile_enable,
3637 "%llu\n"
3638);
3639
3640static int __profile_cmp(const void *a, const void *b)
3641{
3642 return *((unsigned long long *) a) - *((unsigned long long *) b);
3643}
3644
3645/*
3646 * This uses about 800b in the stack, but the function using it is not part
3647 * of a callstack where much memory is being used, so it is fine
3648 */
3649#define PERCENTILE_WIDTH 5
3650#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
3651
3652static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
3653 u64 *percentiles, u32 index_end, u32 index_start)
3654{
3655 unsigned int nelem = 0;
3656 unsigned int index;
3657 struct fifo_profile_gk20a *profile;
3658
3659 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
3660 profile = &g->fifo.profile.data[index];
3661
3662 if (profile->timestamp[index_end] >
3663 profile->timestamp[index_start]) {
3664 /* This is a valid element */
3665 g->fifo.profile.sorted[nelem] =
3666 profile->timestamp[index_end] -
3667 profile->timestamp[index_start];
3668 nelem++;
3669 }
3670 }
3671
3672 /* sort it */
3673 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
3674 __profile_cmp, NULL);
3675
3676 /* build ranges */
3677 for (index = 0; index < PERCENTILE_RANGES; index++)
3678 percentiles[index] =
3679 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
3680 nelem)/100 - 1];
3681 return nelem;
3682}
3683
3684static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
3685{
3686 struct gk20a *g = s->private;
3687 unsigned int get, nelem, index;
3688 /*
3689 * 800B in the stack, but function is declared statically and only
3690 * called from debugfs handler
3691 */
3692 u64 percentiles_ioctl[PERCENTILE_RANGES];
3693 u64 percentiles_kickoff[PERCENTILE_RANGES];
3694 u64 percentiles_jobtracking[PERCENTILE_RANGES];
3695 u64 percentiles_append[PERCENTILE_RANGES];
3696 u64 percentiles_userd[PERCENTILE_RANGES];
3697
3698 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
3699 seq_printf(s, "Profiling disabled\n");
3700 return 0;
3701 }
3702
3703 get = atomic_read(&g->fifo.profile.get);
3704
3705 __gk20a_fifo_create_stats(g, percentiles_ioctl,
3706 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
3707 __gk20a_fifo_create_stats(g, percentiles_kickoff,
3708 PROFILE_END, PROFILE_ENTRY);
3709 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
3710 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
3711 __gk20a_fifo_create_stats(g, percentiles_append,
3712 PROFILE_APPEND, PROFILE_JOB_TRACKING);
3713 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
3714 PROFILE_END, PROFILE_APPEND);
3715
3716 seq_printf(s, "Number of kickoffs: %d\n", nelem);
3717 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
3718
3719 for (index = 0; index < PERCENTILE_RANGES; index++)
3720 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
3721 PERCENTILE_WIDTH * (index+1),
3722 percentiles_ioctl[index],
3723 percentiles_kickoff[index],
3724 percentiles_append[index],
3725 percentiles_jobtracking[index],
3726 percentiles_userd[index]);
3727
3728 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
3729
3730 return 0;
3731}
3732
3733static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
3734{
3735 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
3736}
3737
3738static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
3739 .open = gk20a_fifo_profile_stats_open,
3740 .read = seq_read,
3741 .llseek = seq_lseek,
3742 .release = single_release,
3743};
3744
3745
3746void gk20a_fifo_debugfs_init(struct device *dev)
3747{
3748 struct gk20a_platform *platform = dev_get_drvdata(dev);
3749 struct gk20a *g = get_gk20a(dev);
3750
3751 struct dentry *gpu_root = platform->debugfs;
3752 struct dentry *fifo_root;
3753 struct dentry *profile_root;
3754
3755
3756 fifo_root = debugfs_create_dir("fifo", gpu_root);
3757 if (IS_ERR_OR_NULL(fifo_root))
3758 return;
3759
3760 gk20a_dbg(gpu_dbg_info, "g=%p", g);
3761
3762 debugfs_create_file("sched", 0600, fifo_root, g,
3763 &gk20a_fifo_sched_debugfs_fops);
3764
3765 profile_root = debugfs_create_dir("profile", fifo_root);
3766 if (IS_ERR_OR_NULL(profile_root))
3767 return;
3768
3769 nvgpu_mutex_init(&g->fifo.profile.lock);
3770 g->fifo.profile.enabled = false;
3771 atomic_set(&g->fifo.profile.get, 0);
3772 atomic_set(&g->fifo.profile.ref.refcount, 0);
3773
3774 debugfs_create_file("enable", 0600, profile_root, g,
3775 &gk20a_fifo_profile_enable_debugfs_fops);
3776
3777 debugfs_create_file("stats", 0600, profile_root, g,
3778 &gk20a_fifo_profile_stats_debugfs_fops);
3779
3780}
3781#endif /* CONFIG_DEBUG_FS */
3782
3783static const char * const ccsr_chan_status_str[] = { 3427static const char * const ccsr_chan_status_str[] = {
3784 "idle", 3428 "idle",
3785 "pending", 3429 "pending",
@@ -3901,6 +3545,54 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
3901 gk20a_debug_output(o, "\n"); 3545 gk20a_debug_output(o, "\n");
3902} 3546}
3903 3547
3548void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
3549 struct gk20a_debug_output *o)
3550{
3551 struct fifo_gk20a *f = &g->fifo;
3552 u32 chid;
3553 struct ch_state **ch_state;
3554
3555 ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
3556 if (!ch_state) {
3557 gk20a_debug_output(o, "cannot alloc memory for channels\n");
3558 return;
3559 }
3560
3561 for (chid = 0; chid < f->num_channels; chid++) {
3562 struct channel_gk20a *ch = &f->channel[chid];
3563 if (gk20a_channel_get(ch)) {
3564 ch_state[chid] =
3565 nvgpu_kmalloc(g, sizeof(struct ch_state) +
3566 ram_in_alloc_size_v());
3567 /* ref taken stays to below loop with
3568 * successful allocs */
3569 if (!ch_state[chid])
3570 gk20a_channel_put(ch);
3571 }
3572 }
3573
3574 for (chid = 0; chid < f->num_channels; chid++) {
3575 struct channel_gk20a *ch = &f->channel[chid];
3576 if (!ch_state[chid])
3577 continue;
3578
3579 ch_state[chid]->pid = ch->pid;
3580 ch_state[chid]->refs = atomic_read(&ch->ref_count);
3581 nvgpu_mem_rd_n(g, &ch->inst_block, 0,
3582 &ch_state[chid]->inst_block[0],
3583 ram_in_alloc_size_v());
3584 gk20a_channel_put(ch);
3585 }
3586 for (chid = 0; chid < f->num_channels; chid++) {
3587 if (ch_state[chid]) {
3588 g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
3589 ch_state[chid]);
3590 nvgpu_kfree(g, ch_state[chid]);
3591 }
3592 }
3593 nvgpu_kfree(g, ch_state);
3594}
3595
3904void gk20a_dump_pbdma_status(struct gk20a *g, 3596void gk20a_dump_pbdma_status(struct gk20a *g,
3905 struct gk20a_debug_output *o) 3597 struct gk20a_debug_output *o)
3906{ 3598{