summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c302
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h32
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c58
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c34
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c247
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h15
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c93
-rw-r--r--drivers/gpu/nvgpu/gk20a/mc_gk20a.c18
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c4
-rw-r--r--include/trace/events/gk20a.h50
13 files changed, 681 insertions, 188 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 4a3076b5..b4fdfb44 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Color decompression engine support 2 * Color decompression engine support
3 * 3 *
4 * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. 4 * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -74,7 +74,7 @@ __must_hold(&cde_app->mutex)
74 trace_gk20a_cde_remove_ctx(cde_ctx); 74 trace_gk20a_cde_remove_ctx(cde_ctx);
75 75
76 /* free the channel */ 76 /* free the channel */
77 gk20a_free_channel(cde_ctx->ch, true); 77 gk20a_channel_close(ch);
78 78
79 /* ..then release mapped memory */ 79 /* ..then release mapped memory */
80 gk20a_deinit_cde_img(cde_ctx); 80 gk20a_deinit_cde_img(cde_ctx);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index c12f196d..5a71e874 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -42,8 +42,8 @@
42 42
43#define NVMAP_HANDLE_PARAM_SIZE 1 43#define NVMAP_HANDLE_PARAM_SIZE 1
44 44
45static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f); 45static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
46static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c); 46static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47 47
48static void free_priv_cmdbuf(struct channel_gk20a *c, 48static void free_priv_cmdbuf(struct channel_gk20a *c,
49 struct priv_cmd_entry *e); 49 struct priv_cmd_entry *e);
@@ -61,29 +61,33 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c,
61 bool add); 61 bool add);
62static void gk20a_free_error_notifiers(struct channel_gk20a *ch); 62static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
63 63
64static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f) 64/* allocate GPU channel */
65static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
65{ 66{
66 struct channel_gk20a *ch = NULL; 67 struct channel_gk20a *ch = NULL;
67 int chid;
68 68
69 mutex_lock(&f->ch_inuse_mutex); 69 mutex_lock(&f->free_chs_mutex);
70 for (chid = 0; chid < f->num_channels; chid++) { 70 if (!list_empty(&f->free_chs)) {
71 if (!f->channel[chid].in_use) { 71 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
72 f->channel[chid].in_use = true; 72 free_chs);
73 ch = &f->channel[chid]; 73 list_del(&ch->free_chs);
74 break; 74 WARN_ON(atomic_read(&ch->ref_count));
75 } 75 WARN_ON(ch->referenceable);
76 } 76 }
77 mutex_unlock(&f->ch_inuse_mutex); 77 mutex_unlock(&f->free_chs_mutex);
78 78
79 return ch; 79 return ch;
80} 80}
81 81
82static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c) 82static void free_channel(struct fifo_gk20a *f,
83 struct channel_gk20a *ch)
83{ 84{
84 mutex_lock(&f->ch_inuse_mutex); 85 trace_gk20a_release_used_channel(ch->hw_chid);
85 f->channel[c->hw_chid].in_use = false; 86 /* refcount is zero here and channel is in a freed/dead state */
86 mutex_unlock(&f->ch_inuse_mutex); 87 mutex_lock(&f->free_chs_mutex);
88 /* add to head to increase visibility of timing-related bugs */
89 list_add(&ch->free_chs, &f->free_chs);
90 mutex_unlock(&f->free_chs_mutex);
87} 91}
88 92
89int channel_gk20a_commit_va(struct channel_gk20a *c) 93int channel_gk20a_commit_va(struct channel_gk20a *c)
@@ -361,6 +365,11 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
361 struct channel_gk20a_job *job, *n; 365 struct channel_gk20a_job *job, *n;
362 bool released_job_semaphore = false; 366 bool released_job_semaphore = false;
363 367
368 gk20a_dbg_fn("");
369
370 /* make sure new kickoffs are prevented */
371 ch->has_timedout = true;
372
364 /* ensure no fences are pending */ 373 /* ensure no fences are pending */
365 mutex_lock(&ch->submit_lock); 374 mutex_lock(&ch->submit_lock);
366 if (ch->sync) 375 if (ch->sync)
@@ -416,6 +425,8 @@ void gk20a_disable_channel(struct channel_gk20a *ch,
416 bool finish, 425 bool finish,
417 unsigned long finish_timeout) 426 unsigned long finish_timeout)
418{ 427{
428 gk20a_dbg_fn("");
429
419 if (finish) { 430 if (finish) {
420 int err = gk20a_channel_finish(ch, finish_timeout); 431 int err = gk20a_channel_finish(ch, finish_timeout);
421 WARN_ON(err); 432 WARN_ON(err);
@@ -627,8 +638,9 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
627 (u32)(nsec >> 32); 638 (u32)(nsec >> 32);
628 ch->error_notifier->info32 = error; 639 ch->error_notifier->info32 = error;
629 ch->error_notifier->status = 0xffff; 640 ch->error_notifier->status = 0xffff;
641
630 gk20a_err(dev_from_gk20a(ch->g), 642 gk20a_err(dev_from_gk20a(ch->g),
631 "error notifier set to %d for ch %d\n", error, ch->hw_chid); 643 "error notifier set to %d for ch %d", error, ch->hw_chid);
632 } 644 }
633} 645}
634 646
@@ -643,7 +655,53 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
643 } 655 }
644} 656}
645 657
646void gk20a_free_channel(struct channel_gk20a *ch, bool finish) 658/* Returns delta of cyclic integers a and b. If a is ahead of b, delta
659 * is positive */
660static int cyclic_delta(int a, int b)
661{
662 return a - b;
663}
664
665static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
666{
667 int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
668 int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
669
670 /* wait until all stalling irqs are handled */
671 wait_event(g->sw_irq_stall_last_handled_wq,
672 cyclic_delta(stall_irq_threshold,
673 atomic_read(&g->sw_irq_stall_last_handled))
674 <= 0);
675
676 /* wait until all non-stalling irqs are handled */
677 wait_event(g->sw_irq_nonstall_last_handled_wq,
678 cyclic_delta(nonstall_irq_threshold,
679 atomic_read(&g->sw_irq_nonstall_last_handled))
680 <= 0);
681}
682
683static void gk20a_wait_until_counter_is_N(
684 struct channel_gk20a *ch, atomic_t *counter, int wait_value,
685 wait_queue_head_t *wq, const char *caller, const char *counter_name)
686{
687 while (true) {
688 if (wait_event_timeout(
689 *wq,
690 atomic_read(counter) == wait_value,
691 msecs_to_jiffies(5000)) > 0)
692 break;
693
694 gk20a_warn(dev_from_gk20a(ch->g),
695 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
696 caller, ch->hw_chid, counter_name,
697 atomic_read(counter), wait_value);
698 }
699}
700
701
702
703/* call ONLY when no references to the channel exist: after the last put */
704static void gk20a_free_channel(struct channel_gk20a *ch)
647{ 705{
648 struct gk20a *g = ch->g; 706 struct gk20a *g = ch->g;
649 struct fifo_gk20a *f = &g->fifo; 707 struct fifo_gk20a *f = &g->fifo;
@@ -654,13 +712,50 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
654 712
655 gk20a_dbg_fn(""); 713 gk20a_dbg_fn("");
656 714
715 WARN_ON(ch->g == NULL);
716
717 trace_gk20a_free_channel(ch->hw_chid);
718
719 /* prevent new kickoffs */
720 ch->has_timedout = true;
721 wmb();
722
723 /* wait until there's only our ref to the channel */
724 gk20a_wait_until_counter_is_N(
725 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
726 __func__, "references");
727
728 /* wait until all pending interrupts for recently completed
729 * jobs are handled */
730 gk20a_wait_for_deferred_interrupts(g);
731
732 /* prevent new refs */
733 spin_lock(&ch->ref_obtain_lock);
734 if (!ch->referenceable) {
735 spin_unlock(&ch->ref_obtain_lock);
736 gk20a_err(dev_from_gk20a(ch->g),
737 "Extra %s() called to channel %u",
738 __func__, ch->hw_chid);
739 return;
740 }
741 ch->referenceable = false;
742 spin_unlock(&ch->ref_obtain_lock);
743
744 /* matches with the initial reference in gk20a_open_new_channel() */
745 atomic_dec(&ch->ref_count);
746
747 /* wait until no more refs to the channel */
748 gk20a_wait_until_counter_is_N(
749 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
750 __func__, "references");
751
657 /* if engine reset was deferred, perform it now */ 752 /* if engine reset was deferred, perform it now */
658 mutex_lock(&f->deferred_reset_mutex); 753 mutex_lock(&f->deferred_reset_mutex);
659 if (g->fifo.deferred_reset_pending) { 754 if (g->fifo.deferred_reset_pending) {
660 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" 755 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
661 " deferred, running now"); 756 " deferred, running now");
662 gk20a_fifo_reset_engine(g, g->fifo.mmu_fault_engines); 757 gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines);
663 g->fifo.mmu_fault_engines = 0; 758 g->fifo.deferred_fault_engines = 0;
664 g->fifo.deferred_reset_pending = false; 759 g->fifo.deferred_reset_pending = false;
665 } 760 }
666 mutex_unlock(&f->deferred_reset_mutex); 761 mutex_unlock(&f->deferred_reset_mutex);
@@ -674,7 +769,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
674 gk20a_dbg_info("freeing bound channel context, timeout=%ld", 769 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
675 timeout); 770 timeout);
676 771
677 gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout); 772 gk20a_disable_channel(ch, !ch->has_timedout, timeout);
678 773
679 gk20a_free_error_notifiers(ch); 774 gk20a_free_error_notifiers(ch);
680 775
@@ -714,6 +809,10 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
714 spin_unlock(&ch->update_fn_lock); 809 spin_unlock(&ch->update_fn_lock);
715 cancel_work_sync(&ch->update_fn_work); 810 cancel_work_sync(&ch->update_fn_work);
716 811
812 /* make sure we don't have deferred interrupts pending that
813 * could still touch the channel */
814 gk20a_wait_for_deferred_interrupts(g);
815
717unbind: 816unbind:
718 if (gk20a_is_channel_marked_as_tsg(ch)) 817 if (gk20a_is_channel_marked_as_tsg(ch))
719 gk20a_tsg_unbind_channel(ch); 818 gk20a_tsg_unbind_channel(ch);
@@ -743,8 +842,66 @@ unbind:
743 mutex_unlock(&ch->dbg_s_lock); 842 mutex_unlock(&ch->dbg_s_lock);
744 843
745release: 844release:
845 /* make sure we catch accesses of unopened channels in case
846 * there's non-refcounted channel pointers hanging around */
847 ch->g = NULL;
848 wmb();
849
746 /* ALWAYS last */ 850 /* ALWAYS last */
747 release_used_channel(f, ch); 851 free_channel(f, ch);
852}
853
854/* Try to get a reference to the channel. Return nonzero on success. If fails,
855 * the channel is dead or being freed elsewhere and you must not touch it.
856 *
857 * Always when a channel_gk20a pointer is seen and about to be used, a
858 * reference must be held to it - either by you or the caller, which should be
859 * documented well or otherwise clearly seen. This usually boils down to the
860 * file from ioctls directly, or an explicit get in exception handlers when the
861 * channel is found by a hw_chid.
862 *
863 * Most global functions in this file require a reference to be held by the
864 * caller.
865 */
866struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
867 const char *caller) {
868 struct channel_gk20a *ret;
869
870 spin_lock(&ch->ref_obtain_lock);
871
872 if (likely(ch->referenceable)) {
873 atomic_inc(&ch->ref_count);
874 ret = ch;
875 } else
876 ret = NULL;
877
878 spin_unlock(&ch->ref_obtain_lock);
879
880 if (ret)
881 trace_gk20a_channel_get(ch->hw_chid, caller);
882
883 return ret;
884}
885
886void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
887{
888 trace_gk20a_channel_put(ch->hw_chid, caller);
889 atomic_dec(&ch->ref_count);
890 wake_up_all(&ch->ref_count_dec_wq);
891
892 /* More puts than gets. Channel is probably going to get
893 * stuck. */
894 WARN_ON(atomic_read(&ch->ref_count) < 0);
895
896 /* Also, more puts than gets. ref_count can go to 0 only if
897 * the channel is closing. Channel is probably going to get
898 * stuck. */
899 WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
900}
901
902void gk20a_channel_close(struct channel_gk20a *ch)
903{
904 gk20a_free_channel(ch);
748} 905}
749 906
750int gk20a_channel_release(struct inode *inode, struct file *filp) 907int gk20a_channel_release(struct inode *inode, struct file *filp)
@@ -758,14 +915,14 @@ int gk20a_channel_release(struct inode *inode, struct file *filp)
758 915
759 trace_gk20a_channel_release(dev_name(&g->dev->dev)); 916 trace_gk20a_channel_release(dev_name(&g->dev->dev));
760 917
761 err = gk20a_busy(ch->g->dev); 918 err = gk20a_busy(g->dev);
762 if (err) { 919 if (err) {
763 gk20a_err(dev_from_gk20a(g), "failed to release channel %d", 920 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
764 ch->hw_chid); 921 ch->hw_chid);
765 return err; 922 return err;
766 } 923 }
767 gk20a_free_channel(ch, true); 924 gk20a_channel_close(ch);
768 gk20a_idle(ch->g->dev); 925 gk20a_idle(g->dev);
769 926
770 filp->private_data = NULL; 927 filp->private_data = NULL;
771 return 0; 928 return 0;
@@ -808,22 +965,31 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
808 struct fifo_gk20a *f = &g->fifo; 965 struct fifo_gk20a *f = &g->fifo;
809 struct channel_gk20a *ch; 966 struct channel_gk20a *ch;
810 967
811 ch = acquire_unused_channel(f); 968 gk20a_dbg_fn("");
969
970 ch = allocate_channel(f);
812 if (ch == NULL) { 971 if (ch == NULL) {
813 /* TBD: we want to make this virtualizable */ 972 /* TBD: we want to make this virtualizable */
814 gk20a_err(dev_from_gk20a(g), "out of hw chids"); 973 gk20a_err(dev_from_gk20a(g), "out of hw chids");
815 return NULL; 974 return NULL;
816 } 975 }
817 976
977 trace_gk20a_open_new_channel(ch->hw_chid);
978
979 BUG_ON(ch->g);
818 ch->g = g; 980 ch->g = g;
819 981
820 if (g->ops.fifo.alloc_inst(g, ch)) { 982 if (g->ops.fifo.alloc_inst(g, ch)) {
821 ch->in_use = false; 983 ch->g = NULL;
984 free_channel(f, ch);
822 gk20a_err(dev_from_gk20a(g), 985 gk20a_err(dev_from_gk20a(g),
823 "failed to open gk20a channel, out of inst mem"); 986 "failed to open gk20a channel, out of inst mem");
824
825 return NULL; 987 return NULL;
826 } 988 }
989
990 /* now the channel is in a limbo out of the free list but not marked as
991 * alive and used (i.e. get-able) yet */
992
827 ch->pid = current->pid; 993 ch->pid = current->pid;
828 994
829 /* By default, channel is regular (non-TSG) channel */ 995 /* By default, channel is regular (non-TSG) channel */
@@ -854,6 +1020,13 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
854 spin_lock_init(&ch->update_fn_lock); 1020 spin_lock_init(&ch->update_fn_lock);
855 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); 1021 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
856 1022
1023 /* Mark the channel alive, get-able, with 1 initial use
1024 * references. The initial reference will be decreased in
1025 * gk20a_free_channel() */
1026 ch->referenceable = true;
1027 atomic_set(&ch->ref_count, 1);
1028 wmb();
1029
857 return ch; 1030 return ch;
858} 1031}
859 1032
@@ -1379,7 +1552,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1379 struct mapped_buffer_node **mapped_buffers = NULL; 1552 struct mapped_buffer_node **mapped_buffers = NULL;
1380 int err = 0, num_mapped_buffers; 1553 int err = 0, num_mapped_buffers;
1381 1554
1382 /* job needs reference to this vm */ 1555 /* job needs reference to this vm (released in channel_update) */
1383 gk20a_vm_get(vm); 1556 gk20a_vm_get(vm);
1384 1557
1385 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); 1558 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
@@ -1395,14 +1568,21 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1395 return -ENOMEM; 1568 return -ENOMEM;
1396 } 1569 }
1397 1570
1398 job->num_mapped_buffers = num_mapped_buffers; 1571 /* put() is done in gk20a_channel_update() when the job is done */
1399 job->mapped_buffers = mapped_buffers; 1572 c = gk20a_channel_get(c);
1400 job->pre_fence = gk20a_fence_get(pre_fence);
1401 job->post_fence = gk20a_fence_get(post_fence);
1402 1573
1403 mutex_lock(&c->jobs_lock); 1574 if (c) {
1404 list_add_tail(&job->list, &c->jobs); 1575 job->num_mapped_buffers = num_mapped_buffers;
1405 mutex_unlock(&c->jobs_lock); 1576 job->mapped_buffers = mapped_buffers;
1577 job->pre_fence = gk20a_fence_get(pre_fence);
1578 job->post_fence = gk20a_fence_get(post_fence);
1579
1580 mutex_lock(&c->jobs_lock);
1581 list_add_tail(&job->list, &c->jobs);
1582 mutex_unlock(&c->jobs_lock);
1583 } else {
1584 return -ETIMEDOUT;
1585 }
1406 1586
1407 return 0; 1587 return 0;
1408} 1588}
@@ -1412,13 +1592,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1412 struct vm_gk20a *vm = c->vm; 1592 struct vm_gk20a *vm = c->vm;
1413 struct channel_gk20a_job *job, *n; 1593 struct channel_gk20a_job *job, *n;
1414 1594
1415 trace_gk20a_channel_update(c); 1595 trace_gk20a_channel_update(c->hw_chid);
1416 1596
1417 wake_up(&c->submit_wq); 1597 wake_up(&c->submit_wq);
1418 1598
1419 mutex_lock(&c->submit_lock); 1599 mutex_lock(&c->submit_lock);
1420 mutex_lock(&c->jobs_lock); 1600 mutex_lock(&c->jobs_lock);
1421 list_for_each_entry_safe(job, n, &c->jobs, list) { 1601 list_for_each_entry_safe(job, n, &c->jobs, list) {
1602 struct gk20a *g = c->g;
1603
1422 bool completed = gk20a_fence_is_expired(job->post_fence); 1604 bool completed = gk20a_fence_is_expired(job->post_fence);
1423 if (!completed) 1605 if (!completed)
1424 break; 1606 break;
@@ -1434,12 +1616,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1434 gk20a_fence_put(job->pre_fence); 1616 gk20a_fence_put(job->pre_fence);
1435 gk20a_fence_put(job->post_fence); 1617 gk20a_fence_put(job->post_fence);
1436 1618
1437 /* job is done. release its reference to vm */ 1619 /* job is done. release its vm reference (taken in add_job) */
1438 gk20a_vm_put(vm); 1620 gk20a_vm_put(vm);
1621 /* another bookkeeping taken in add_job. caller must hold a ref
1622 * so this wouldn't get freed here. */
1623 gk20a_channel_put(c);
1439 1624
1440 list_del_init(&job->list); 1625 list_del_init(&job->list);
1441 kfree(job); 1626 kfree(job);
1442 gk20a_idle(c->g->dev); 1627 gk20a_idle(g->dev);
1443 } 1628 }
1444 1629
1445 /* 1630 /*
@@ -1719,10 +1904,13 @@ clean_up:
1719int gk20a_init_channel_support(struct gk20a *g, u32 chid) 1904int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1720{ 1905{
1721 struct channel_gk20a *c = g->fifo.channel+chid; 1906 struct channel_gk20a *c = g->fifo.channel+chid;
1722 c->g = g; 1907 c->g = NULL;
1723 c->in_use = false;
1724 c->hw_chid = chid; 1908 c->hw_chid = chid;
1725 c->bound = false; 1909 c->bound = false;
1910 spin_lock_init(&c->ref_obtain_lock);
1911 atomic_set(&c->ref_count, 0);
1912 c->referenceable = false;
1913 init_waitqueue_head(&c->ref_count_dec_wq);
1726 mutex_init(&c->ioctl_lock); 1914 mutex_init(&c->ioctl_lock);
1727 mutex_init(&c->jobs_lock); 1915 mutex_init(&c->jobs_lock);
1728 mutex_init(&c->submit_lock); 1916 mutex_init(&c->submit_lock);
@@ -1733,6 +1921,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1733#endif 1921#endif
1734 INIT_LIST_HEAD(&c->dbg_s_list); 1922 INIT_LIST_HEAD(&c->dbg_s_list);
1735 mutex_init(&c->dbg_s_lock); 1923 mutex_init(&c->dbg_s_lock);
1924 list_add(&c->free_chs, &g->fifo.free_chs);
1736 1925
1737 return 0; 1926 return 0;
1738} 1927}
@@ -2066,8 +2255,7 @@ int gk20a_channel_suspend(struct gk20a *g)
2066 2255
2067 for (chid = 0; chid < f->num_channels; chid++) { 2256 for (chid = 0; chid < f->num_channels; chid++) {
2068 struct channel_gk20a *ch = &f->channel[chid]; 2257 struct channel_gk20a *ch = &f->channel[chid];
2069 if (ch->in_use) { 2258 if (gk20a_channel_get(ch)) {
2070
2071 gk20a_dbg_info("suspend channel %d", chid); 2259 gk20a_dbg_info("suspend channel %d", chid);
2072 /* disable channel */ 2260 /* disable channel */
2073 g->ops.fifo.disable_channel(ch); 2261 g->ops.fifo.disable_channel(ch);
@@ -2079,6 +2267,8 @@ int gk20a_channel_suspend(struct gk20a *g)
2079 flush_work(&ch->update_fn_work); 2267 flush_work(&ch->update_fn_work);
2080 2268
2081 channels_in_use = true; 2269 channels_in_use = true;
2270
2271 gk20a_channel_put(ch);
2082 } 2272 }
2083 } 2273 }
2084 2274
@@ -2086,8 +2276,10 @@ int gk20a_channel_suspend(struct gk20a *g)
2086 g->ops.fifo.update_runlist(g, 0, ~0, false, true); 2276 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2087 2277
2088 for (chid = 0; chid < f->num_channels; chid++) { 2278 for (chid = 0; chid < f->num_channels; chid++) {
2089 if (f->channel[chid].in_use) 2279 if (gk20a_channel_get(&f->channel[chid])) {
2090 g->ops.fifo.unbind_channel(&f->channel[chid]); 2280 g->ops.fifo.unbind_channel(&f->channel[chid]);
2281 gk20a_channel_put(&f->channel[chid]);
2282 }
2091 } 2283 }
2092 } 2284 }
2093 2285
@@ -2095,8 +2287,6 @@ int gk20a_channel_suspend(struct gk20a *g)
2095 return 0; 2287 return 0;
2096} 2288}
2097 2289
2098/* in this context the "channel" is the host1x channel which
2099 * maps to *all* gk20a channels */
2100int gk20a_channel_resume(struct gk20a *g) 2290int gk20a_channel_resume(struct gk20a *g)
2101{ 2291{
2102 struct fifo_gk20a *f = &g->fifo; 2292 struct fifo_gk20a *f = &g->fifo;
@@ -2106,10 +2296,11 @@ int gk20a_channel_resume(struct gk20a *g)
2106 gk20a_dbg_fn(""); 2296 gk20a_dbg_fn("");
2107 2297
2108 for (chid = 0; chid < f->num_channels; chid++) { 2298 for (chid = 0; chid < f->num_channels; chid++) {
2109 if (f->channel[chid].in_use) { 2299 if (gk20a_channel_get(&f->channel[chid])) {
2110 gk20a_dbg_info("resume channel %d", chid); 2300 gk20a_dbg_info("resume channel %d", chid);
2111 g->ops.fifo.bind_channel(&f->channel[chid]); 2301 g->ops.fifo.bind_channel(&f->channel[chid]);
2112 channels_in_use = true; 2302 channels_in_use = true;
2303 gk20a_channel_put(&f->channel[chid]);
2113 } 2304 }
2114 } 2305 }
2115 2306
@@ -2129,10 +2320,11 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2129 2320
2130 for (chid = 0; chid < f->num_channels; chid++) { 2321 for (chid = 0; chid < f->num_channels; chid++) {
2131 struct channel_gk20a *c = g->fifo.channel+chid; 2322 struct channel_gk20a *c = g->fifo.channel+chid;
2132 if (c->in_use) { 2323 if (gk20a_channel_get(c)) {
2133 gk20a_channel_event(c); 2324 gk20a_channel_event(c);
2134 wake_up_interruptible_all(&c->semaphore_wq); 2325 wake_up_interruptible_all(&c->semaphore_wq);
2135 gk20a_channel_update(c, 0); 2326 gk20a_channel_update(c, 0);
2327 gk20a_channel_put(c);
2136 } 2328 }
2137 } 2329 }
2138} 2330}
@@ -2225,10 +2417,18 @@ long gk20a_channel_ioctl(struct file *filp,
2225 return -EFAULT; 2417 return -EFAULT;
2226 } 2418 }
2227 2419
2420 /* take a ref or return timeout if channel refs can't be taken */
2421 ch = gk20a_channel_get(ch);
2422 if (!ch)
2423 return -ETIMEDOUT;
2424
2228 /* protect our sanity for threaded userspace - most of the channel is 2425 /* protect our sanity for threaded userspace - most of the channel is
2229 * not thread safe */ 2426 * not thread safe */
2230 mutex_lock(&ch->ioctl_lock); 2427 mutex_lock(&ch->ioctl_lock);
2231 2428
2429 /* this ioctl call keeps a ref to the file which keeps a ref to the
2430 * channel */
2431
2232 switch (cmd) { 2432 switch (cmd) {
2233 case NVGPU_IOCTL_CHANNEL_OPEN: 2433 case NVGPU_IOCTL_CHANNEL_OPEN:
2234 err = gk20a_channel_open_ioctl(ch->g, 2434 err = gk20a_channel_open_ioctl(ch->g,
@@ -2449,9 +2649,11 @@ long gk20a_channel_ioctl(struct file *filp,
2449 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 2649 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2450 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); 2650 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2451 2651
2452 gk20a_dbg_fn("end");
2453
2454 mutex_unlock(&ch->ioctl_lock); 2652 mutex_unlock(&ch->ioctl_lock);
2455 2653
2654 gk20a_channel_put(ch);
2655
2656 gk20a_dbg_fn("end");
2657
2456 return err; 2658 return err;
2457} 2659}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index f022fe36..2ea5b4be 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -19,12 +19,13 @@
19#define CHANNEL_GK20A_H 19#define CHANNEL_GK20A_H
20 20
21#include <linux/log2.h> 21#include <linux/log2.h>
22#include <linux/slab.h>
23#include <linux/wait.h>
24#include <linux/mutex.h> 22#include <linux/mutex.h>
25#include <uapi/linux/nvgpu.h>
26#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/semaphore.h>
25#include <linux/slab.h>
27#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/wait.h>
28#include <uapi/linux/nvgpu.h>
28 29
29struct gk20a; 30struct gk20a;
30struct gr_gk20a; 31struct gr_gk20a;
@@ -77,8 +78,15 @@ struct channel_gk20a_poll_events {
77 78
78/* this is the priv element of struct nvhost_channel */ 79/* this is the priv element of struct nvhost_channel */
79struct channel_gk20a { 80struct channel_gk20a {
80 struct gk20a *g; 81 struct gk20a *g; /* set only when channel is active */
81 bool in_use; 82
83 struct list_head free_chs;
84
85 spinlock_t ref_obtain_lock;
86 bool referenceable;
87 atomic_t ref_count;
88 wait_queue_head_t ref_count_dec_wq;
89
82 int hw_chid; 90 int hw_chid;
83 bool bound; 91 bool bound;
84 bool first_init; 92 bool first_init;
@@ -171,7 +179,10 @@ static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
171} 179}
172int channel_gk20a_commit_va(struct channel_gk20a *c); 180int channel_gk20a_commit_va(struct channel_gk20a *c);
173int gk20a_init_channel_support(struct gk20a *, u32 chid); 181int gk20a_init_channel_support(struct gk20a *, u32 chid);
174void gk20a_free_channel(struct channel_gk20a *ch, bool finish); 182
183/* must be inside gk20a_busy()..gk20a_idle() */
184void gk20a_channel_close(struct channel_gk20a *ch);
185
175bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, 186bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
176 u32 timeout_delta_ms); 187 u32 timeout_delta_ms);
177void gk20a_disable_channel(struct channel_gk20a *ch, 188void gk20a_disable_channel(struct channel_gk20a *ch,
@@ -202,6 +213,15 @@ void gk20a_channel_event(struct channel_gk20a *ch);
202 213
203void gk20a_init_channel(struct gpu_ops *gops); 214void gk20a_init_channel(struct gpu_ops *gops);
204 215
216/* returns ch if reference was obtained */
217struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
218 const char *caller);
219#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)
220
221
222void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
223#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
224
205int gk20a_wait_channel_idle(struct channel_gk20a *ch); 225int gk20a_wait_channel_idle(struct channel_gk20a *ch);
206struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g); 226struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g);
207struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, 227struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 8cc852c7..7a707fbd 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -154,8 +154,23 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
154 154
155static void gk20a_channel_syncpt_update(void *priv, int nr_completed) 155static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
156{ 156{
157 struct channel_gk20a *ch20a = priv; 157 struct channel_gk20a *ch = priv;
158 gk20a_channel_update(ch20a, nr_completed); 158 struct gk20a *g = ch->g;
159
160 /* need busy for possible channel deletion */
161 if (gk20a_busy(ch->g->dev)) {
162 gk20a_err(dev_from_gk20a(ch->g),
163 "failed to busy while syncpt update");
164 /* Last gk20a_idle()s are in channel_update, so we shouldn't
165 * get here. If we do, the channel is badly broken now */
166 return;
167 }
168
169 /* note: channel_get() is in __gk20a_channel_syncpt_incr() */
170 gk20a_channel_update(ch, nr_completed);
171 gk20a_channel_put(ch);
172
173 gk20a_idle(g->dev);
159} 174}
160 175
161static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 176static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
@@ -209,14 +224,37 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
209 thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); 224 thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
210 225
211 if (register_irq) { 226 if (register_irq) {
212 err = nvhost_intr_register_notifier(sp->host1x_pdev, 227 err = gk20a_busy(c->g->dev);
213 sp->id, thresh, 228 if (err)
214 gk20a_channel_syncpt_update, c); 229 gk20a_err(dev_from_gk20a(c->g),
215 230 "failed to add syncpt interrupt notifier for channel %d",
216 /* Adding interrupt action should never fail. A proper error 231 c->hw_chid);
217 * handling here would require us to decrement the syncpt max 232 else {
218 * back to its original value. */ 233 struct channel_gk20a *referenced = gk20a_channel_get(c);
219 WARN(err, "failed to set submit complete interrupt"); 234
235 WARN_ON(!referenced);
236 gk20a_idle(c->g->dev);
237
238 if (referenced) {
239 /* note: channel_put() is in
240 * gk20a_channel_syncpt_update() */
241
242 err = nvhost_intr_register_notifier(
243 sp->host1x_pdev,
244 sp->id, thresh,
245 gk20a_channel_syncpt_update, c);
246 if (err)
247 gk20a_channel_put(referenced);
248
249 /* Adding interrupt action should
250 * never fail. A proper error handling
251 * here would require us to decrement
252 * the syncpt max back to its original
253 * value. */
254 WARN(err,
255 "failed to set submit complete interrupt");
256 }
257 }
220 } 258 }
221 259
222 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, 260 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 0f1c31dd..bda0dab0 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -36,6 +36,7 @@ static struct platform_device *gk20a_device;
36 36
37struct ch_state { 37struct ch_state {
38 int pid; 38 int pid;
39 int refs;
39 u8 inst_block[0]; 40 u8 inst_block[0];
40}; 41};
41 42
@@ -118,9 +119,10 @@ static void gk20a_debug_show_channel(struct gk20a *g,
118 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); 119 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w());
119 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); 120 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w());
120 121
121 gk20a_debug_output(o, "%d-%s, pid %d: ", hw_chid, 122 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid,
122 g->dev->name, 123 g->dev->name,
123 ch_state->pid); 124 ch_state->pid,
125 ch_state->refs);
124 gk20a_debug_output(o, "%s in use %s %s\n", 126 gk20a_debug_output(o, "%s in use %s %s\n",
125 ccsr_channel_enable_v(channel) ? "" : "not", 127 ccsr_channel_enable_v(channel) ? "" : "not",
126 ccsr_chan_status_str[status], 128 ccsr_chan_status_str[status],
@@ -231,16 +233,30 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
231 } 233 }
232 234
233 for (chid = 0; chid < f->num_channels; chid++) { 235 for (chid = 0; chid < f->num_channels; chid++) {
234 if (f->channel[chid].in_use) 236 struct channel_gk20a *ch = &f->channel[chid];
235 ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL); 237 if (gk20a_channel_get(ch)) {
238 ch_state[chid] =
239 kmalloc(sizeof(struct ch_state) +
240 ram_in_alloc_size_v(), GFP_KERNEL);
241 /* ref taken stays to below loop with
242 * successful allocs */
243 if (!ch_state[chid])
244 gk20a_channel_put(ch);
245 }
236 } 246 }
237 247
238 for (chid = 0; chid < f->num_channels; chid++) { 248 for (chid = 0; chid < f->num_channels; chid++) {
239 if (ch_state[chid] && f->channel[chid].inst_block.cpu_va) { 249 struct channel_gk20a *ch = &f->channel[chid];
240 ch_state[chid]->pid = f->channel[chid].pid; 250 if (ch_state[chid]) {
241 memcpy(&ch_state[chid]->inst_block[0], 251 if (ch->inst_block.cpu_va) {
242 f->channel[chid].inst_block.cpu_va, 252 ch_state[chid]->pid = ch->pid;
243 ram_in_alloc_size_v()); 253 ch_state[chid]->refs =
254 atomic_read(&ch->ref_count);
255 memcpy(&ch_state[chid]->inst_block[0],
256 ch->inst_block.cpu_va,
257 ram_in_alloc_size_v());
258 }
259 gk20a_channel_put(ch);
244 } 260 }
245 } 261 }
246 for (chid = 0; chid < f->num_channels; chid++) { 262 for (chid = 0; chid < f->num_channels; chid++) {
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 56b954a9..4ef310b2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -515,6 +515,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
515 515
516 init_runlist(g, f); 516 init_runlist(g, f);
517 517
518 INIT_LIST_HEAD(&f->free_chs);
519 mutex_init(&f->free_chs_mutex);
520
518 for (chid = 0; chid < f->num_channels; chid++) { 521 for (chid = 0; chid < f->num_channels; chid++) {
519 f->channel[chid].userd_cpu_va = 522 f->channel[chid].userd_cpu_va =
520 f->userd.cpu_va + chid * f->userd_entry_size; 523 f->userd.cpu_va + chid * f->userd_entry_size;
@@ -527,7 +530,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
527 gk20a_init_channel_support(g, chid); 530 gk20a_init_channel_support(g, chid);
528 gk20a_init_tsg_support(g, chid); 531 gk20a_init_tsg_support(g, chid);
529 } 532 }
530 mutex_init(&f->ch_inuse_mutex);
531 mutex_init(&f->tsg_inuse_mutex); 533 mutex_init(&f->tsg_inuse_mutex);
532 534
533 f->remove_support = gk20a_remove_fifo_support; 535 f->remove_support = gk20a_remove_fifo_support;
@@ -637,6 +639,7 @@ int gk20a_init_fifo_support(struct gk20a *g)
637 return err; 639 return err;
638} 640}
639 641
642/* return with a reference to the channel, caller must put it back */
640static struct channel_gk20a * 643static struct channel_gk20a *
641channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) 644channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
642{ 645{
@@ -644,10 +647,16 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
644 if (unlikely(!f->channel)) 647 if (unlikely(!f->channel))
645 return NULL; 648 return NULL;
646 for (ci = 0; ci < f->num_channels; ci++) { 649 for (ci = 0; ci < f->num_channels; ci++) {
647 struct channel_gk20a *c = f->channel+ci; 650 struct channel_gk20a *ch = gk20a_channel_get(&f->channel[ci]);
648 if (c->inst_block.cpu_va && 651 /* only alive channels are searched */
649 (inst_ptr == gk20a_mem_phys(&c->inst_block))) 652 if (!ch)
650 return f->channel+ci; 653 continue;
654
655 if (ch->inst_block.cpu_va &&
656 (inst_ptr == gk20a_mem_phys(&ch->inst_block)))
657 return ch;
658
659 gk20a_channel_put(ch);
651 } 660 }
652 return NULL; 661 return NULL;
653} 662}
@@ -803,6 +812,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
803 return true; 812 return true;
804} 813}
805 814
815/* caller must hold a channel reference */
806static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, 816static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
807 struct channel_gk20a *ch) 817 struct channel_gk20a *ch)
808{ 818{
@@ -854,14 +864,38 @@ static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
854 "TSG %d generated a mmu fault", tsg->tsgid); 864 "TSG %d generated a mmu fault", tsg->tsgid);
855 865
856 mutex_lock(&tsg->ch_list_lock); 866 mutex_lock(&tsg->ch_list_lock);
857 list_for_each_entry(ch, &tsg->ch_list, ch_entry) 867 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
858 ret = gk20a_fifo_set_ctx_mmu_error(g, ch); 868 if (gk20a_channel_get(ch)) {
869 if (!gk20a_fifo_set_ctx_mmu_error(g, ch))
870 ret = false;
871 gk20a_channel_put(ch);
872 }
873 }
859 mutex_unlock(&tsg->ch_list_lock); 874 mutex_unlock(&tsg->ch_list_lock);
860 875
861 return ret; 876 return ret;
862} 877}
863 878
864static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) 879static void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid)
880{
881 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
882 struct channel_gk20a *ch;
883
884 mutex_lock(&tsg->ch_list_lock);
885 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
886 if (gk20a_channel_get(ch)) {
887 gk20a_channel_abort(ch);
888 gk20a_channel_put(ch);
889 }
890 }
891 mutex_unlock(&tsg->ch_list_lock);
892}
893
894static bool gk20a_fifo_handle_mmu_fault(
895 struct gk20a *g,
896 u32 mmu_fault_engines, /* queried from HW if 0 */
897 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
898 bool id_is_tsg)
865{ 899{
866 bool fake_fault; 900 bool fake_fault;
867 unsigned long fault_id; 901 unsigned long fault_id;
@@ -894,10 +928,8 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
894 grfifo_ctl | gr_gpfifo_ctl_access_f(0) | 928 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
895 gr_gpfifo_ctl_semaphore_access_f(0)); 929 gr_gpfifo_ctl_semaphore_access_f(0));
896 930
897 /* If we have recovery in progress, MMU fault id is invalid */ 931 if (mmu_fault_engines) {
898 if (g->fifo.mmu_fault_engines) { 932 fault_id = mmu_fault_engines;
899 fault_id = g->fifo.mmu_fault_engines;
900 g->fifo.mmu_fault_engines = 0;
901 fake_fault = true; 933 fake_fault = true;
902 } else { 934 } else {
903 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); 935 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
@@ -914,6 +946,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
914 struct fifo_mmu_fault_info_gk20a f; 946 struct fifo_mmu_fault_info_gk20a f;
915 struct channel_gk20a *ch = NULL; 947 struct channel_gk20a *ch = NULL;
916 struct tsg_gk20a *tsg = NULL; 948 struct tsg_gk20a *tsg = NULL;
949 struct channel_gk20a *referenced_channel = 0;
917 /* read and parse engine status */ 950 /* read and parse engine status */
918 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); 951 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
919 u32 ctx_status = fifo_engine_status_ctx_status_v(status); 952 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
@@ -953,22 +986,34 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
953 /* get the channel/TSG */ 986 /* get the channel/TSG */
954 if (fake_fault) { 987 if (fake_fault) {
955 /* use next_id if context load is failing */ 988 /* use next_id if context load is failing */
956 u32 id = (ctx_status == 989 u32 id, type;
957 fifo_engine_status_ctx_status_ctxsw_load_v()) ? 990
958 fifo_engine_status_next_id_v(status) : 991 if (hw_id == ~(u32)0) {
959 fifo_engine_status_id_v(status); 992 id = (ctx_status ==
960 u32 type = (ctx_status == 993 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
961 fifo_engine_status_ctx_status_ctxsw_load_v()) ? 994 fifo_engine_status_next_id_v(status) :
962 fifo_engine_status_next_id_type_v(status) : 995 fifo_engine_status_id_v(status);
963 fifo_engine_status_id_type_v(status); 996 type = (ctx_status ==
997 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
998 fifo_engine_status_next_id_type_v(status) :
999 fifo_engine_status_id_type_v(status);
1000 } else {
1001 id = hw_id;
1002 type = id_is_tsg ?
1003 fifo_engine_status_id_type_tsgid_v() :
1004 fifo_engine_status_id_type_chid_v();
1005 }
964 1006
965 if (type == fifo_engine_status_id_type_tsgid_v()) 1007 if (type == fifo_engine_status_id_type_tsgid_v())
966 tsg = &g->fifo.tsg[id]; 1008 tsg = &g->fifo.tsg[id];
967 else if (type == fifo_engine_status_id_type_chid_v()) 1009 else if (type == fifo_engine_status_id_type_chid_v()) {
968 ch = &g->fifo.channel[id]; 1010 ch = &g->fifo.channel[id];
1011 referenced_channel = gk20a_channel_get(ch);
1012 }
969 } else { 1013 } else {
970 /* read channel based on instruction pointer */ 1014 /* read channel based on instruction pointer */
971 ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); 1015 ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
1016 referenced_channel = ch;
972 } 1017 }
973 1018
974 if (ch && gk20a_is_channel_marked_as_tsg(ch)) 1019 if (ch && gk20a_is_channel_marked_as_tsg(ch))
@@ -977,7 +1022,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
977 /* check if engine reset should be deferred */ 1022 /* check if engine reset should be deferred */
978 if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g, 1023 if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g,
979 engine_id, &f, fake_fault)) { 1024 engine_id, &f, fake_fault)) {
980 g->fifo.mmu_fault_engines = fault_id; 1025 g->fifo.deferred_fault_engines = fault_id;
981 1026
982 /* handled during channel free */ 1027 /* handled during channel free */
983 g->fifo.deferred_reset_pending = true; 1028 g->fifo.deferred_reset_pending = true;
@@ -988,19 +1033,31 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
988 * syncpoints */ 1033 * syncpoints */
989 1034
990 if (tsg) { 1035 if (tsg) {
991 struct channel_gk20a *ch = NULL;
992 if (!g->fifo.deferred_reset_pending) 1036 if (!g->fifo.deferred_reset_pending)
993 verbose = 1037 verbose =
994 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); 1038 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
995 mutex_lock(&tsg->ch_list_lock); 1039
996 list_for_each_entry(ch, &tsg->ch_list, ch_entry) 1040 gk20a_fifo_abort_tsg(g, ch->tsgid);
997 gk20a_channel_abort(ch); 1041
998 mutex_unlock(&tsg->ch_list_lock); 1042 /* put back the ref taken early above */
1043 if (referenced_channel) {
1044 gk20a_channel_put(ch);
1045 } else {
1046 gk20a_err(dev_from_gk20a(g),
1047 "mmu error in freed tsg channel %d on tsgid %d",
1048 ch->hw_chid, ch->tsgid);
1049 }
999 } else if (ch) { 1050 } else if (ch) {
1000 if (!g->fifo.deferred_reset_pending) 1051 if (referenced_channel) {
1001 verbose = 1052 if (!g->fifo.deferred_reset_pending)
1002 gk20a_fifo_set_ctx_mmu_error_ch(g, ch); 1053 verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1003 gk20a_channel_abort(ch); 1054 gk20a_channel_abort(ch);
1055 gk20a_channel_put(ch);
1056 } else {
1057 gk20a_err(dev_from_gk20a(g),
1058 "mmu error in freed channel %d",
1059 ch->hw_chid);
1060 }
1004 } else if (f.inst_ptr == 1061 } else if (f.inst_ptr ==
1005 gk20a_mem_phys(&g->mm.bar1.inst_block)) { 1062 gk20a_mem_phys(&g->mm.bar1.inst_block)) {
1006 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); 1063 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
@@ -1133,46 +1190,69 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1133 1190
1134void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) 1191void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1135{ 1192{
1136 u32 engines = gk20a_fifo_engines_on_id(g, hw_chid, false); 1193 u32 engines;
1194
1195 /* stop context switching to prevent engine assignments from
1196 changing until channel is recovered */
1197 mutex_lock(&g->dbg_sessions_lock);
1198 gr_gk20a_disable_ctxsw(g);
1199
1200 engines = gk20a_fifo_engines_on_id(g, hw_chid, false);
1201
1137 if (engines) 1202 if (engines)
1138 gk20a_fifo_recover(g, engines, verbose); 1203 gk20a_fifo_recover(g, engines, hw_chid, false, verbose);
1139 else { 1204 else {
1140 struct channel_gk20a *ch = 1205 struct channel_gk20a *ch = &g->fifo.channel[hw_chid];
1141 g->fifo.channel + hw_chid;
1142 1206
1143 gk20a_channel_abort(ch); 1207 if (gk20a_channel_get(ch)) {
1208 gk20a_channel_abort(ch);
1144 1209
1145 if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) 1210 if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch))
1146 gk20a_debug_dump(g->dev); 1211 gk20a_debug_dump(g->dev);
1212
1213 gk20a_channel_put(ch);
1214 }
1147 } 1215 }
1216
1217 gr_gk20a_enable_ctxsw(g);
1218 mutex_unlock(&g->dbg_sessions_lock);
1148} 1219}
1149 1220
1150void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) 1221void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1151{ 1222{
1152 u32 engines = gk20a_fifo_engines_on_id(g, tsgid, true); 1223 u32 engines;
1224
1225 /* stop context switching to prevent engine assignments from
1226 changing until TSG is recovered */
1227 mutex_lock(&g->dbg_sessions_lock);
1228 gr_gk20a_disable_ctxsw(g);
1229
1230 engines = gk20a_fifo_engines_on_id(g, tsgid, true);
1231
1153 if (engines) 1232 if (engines)
1154 gk20a_fifo_recover(g, engines, verbose); 1233 gk20a_fifo_recover(g, engines, tsgid, true, verbose);
1155 else { 1234 else {
1156 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1235 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1157 struct channel_gk20a *ch;
1158 1236
1159 if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) 1237 if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg))
1160 gk20a_debug_dump(g->dev); 1238 gk20a_debug_dump(g->dev);
1161 1239
1162 mutex_lock(&tsg->ch_list_lock); 1240 gk20a_fifo_abort_tsg(g, tsgid);
1163 list_for_each_entry(ch, &tsg->ch_list, ch_entry)
1164 gk20a_channel_abort(ch);
1165 mutex_unlock(&tsg->ch_list_lock);
1166 } 1241 }
1242
1243 gr_gk20a_enable_ctxsw(g);
1244 mutex_unlock(&g->dbg_sessions_lock);
1167} 1245}
1168 1246
1169void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, 1247void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1170 bool verbose) 1248 u32 hw_id, bool id_is_tsg,
1249 bool verbose)
1171{ 1250{
1172 unsigned long engine_id, i; 1251 unsigned long engine_id, i;
1173 unsigned long _engine_ids = __engine_ids; 1252 unsigned long _engine_ids = __engine_ids;
1174 unsigned long engine_ids = 0; 1253 unsigned long engine_ids = 0;
1175 u32 val; 1254 u32 val;
1255 u32 mmu_fault_engines = 0;
1176 1256
1177 if (verbose) 1257 if (verbose)
1178 gk20a_debug_dump(g->dev); 1258 gk20a_debug_dump(g->dev);
@@ -1181,7 +1261,6 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1181 g->ops.ltc.flush(g); 1261 g->ops.ltc.flush(g);
1182 1262
1183 /* store faulted engines in advance */ 1263 /* store faulted engines in advance */
1184 g->fifo.mmu_fault_engines = 0;
1185 for_each_set_bit(engine_id, &_engine_ids, 32) { 1264 for_each_set_bit(engine_id, &_engine_ids, 32) {
1186 u32 ref_type; 1265 u32 ref_type;
1187 u32 ref_id; 1266 u32 ref_id;
@@ -1196,11 +1275,10 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1196 gk20a_fifo_get_faulty_id_type(g, i, &id, &type); 1275 gk20a_fifo_get_faulty_id_type(g, i, &id, &type);
1197 if (ref_type == type && ref_id == id) { 1276 if (ref_type == type && ref_id == id) {
1198 engine_ids |= BIT(i); 1277 engine_ids |= BIT(i);
1199 g->fifo.mmu_fault_engines |= 1278 mmu_fault_engines |=
1200 BIT(gk20a_engine_id_to_mmu_id(i)); 1279 BIT(gk20a_engine_id_to_mmu_id(i));
1201 } 1280 }
1202 } 1281 }
1203
1204 } 1282 }
1205 1283
1206 /* 1284 /*
@@ -1214,7 +1292,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1214 fifo_intr_0_sched_error_reset_f()); 1292 fifo_intr_0_sched_error_reset_f());
1215 1293
1216 g->ops.fifo.trigger_mmu_fault(g, engine_ids); 1294 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
1217 gk20a_fifo_handle_mmu_fault(g); 1295 gk20a_fifo_handle_mmu_fault(g, engine_ids, hw_id, id_is_tsg);
1218 1296
1219 val = gk20a_readl(g, fifo_intr_en_0_r()); 1297 val = gk20a_readl(g, fifo_intr_en_0_r());
1220 val |= fifo_intr_en_0_mmu_fault_f(1) 1298 val |= fifo_intr_en_0_mmu_fault_f(1)
@@ -1222,25 +1300,32 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1222 gk20a_writel(g, fifo_intr_en_0_r(), val); 1300 gk20a_writel(g, fifo_intr_en_0_r(), val);
1223} 1301}
1224 1302
1303/* force reset channel and tsg (if it's part of one) */
1225int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) 1304int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose)
1226{ 1305{
1227 struct tsg_gk20a *tsg = NULL; 1306 struct tsg_gk20a *tsg = NULL;
1228 struct channel_gk20a *ch_tsg = NULL; 1307 struct channel_gk20a *ch_tsg = NULL;
1308 struct gk20a *g = ch->g;
1229 1309
1230 if (gk20a_is_channel_marked_as_tsg(ch)) { 1310 if (gk20a_is_channel_marked_as_tsg(ch)) {
1231 tsg = &ch->g->fifo.tsg[ch->hw_chid]; 1311 tsg = &g->fifo.tsg[ch->hw_chid];
1232 1312
1233 mutex_lock(&tsg->ch_list_lock); 1313 mutex_lock(&tsg->ch_list_lock);
1314
1234 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { 1315 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
1235 gk20a_set_error_notifier(ch_tsg, 1316 if (gk20a_channel_get(ch_tsg)) {
1236 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); 1317 gk20a_set_error_notifier(ch_tsg,
1318 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR);
1319 gk20a_channel_put(ch_tsg);
1320 }
1237 } 1321 }
1322
1238 mutex_unlock(&tsg->ch_list_lock); 1323 mutex_unlock(&tsg->ch_list_lock);
1239 gk20a_fifo_recover_tsg(ch->g, ch->tsgid, verbose); 1324 gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
1240 } else { 1325 } else {
1241 gk20a_set_error_notifier(ch, 1326 gk20a_set_error_notifier(ch,
1242 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); 1327 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR);
1243 gk20a_fifo_recover_ch(ch->g, ch->hw_chid, verbose); 1328 gk20a_fifo_recover_ch(g, ch->hw_chid, verbose);
1244 } 1329 }
1245 1330
1246 return 0; 1331 return 0;
@@ -1300,11 +1385,14 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1300 struct channel_gk20a *ch = &f->channel[id]; 1385 struct channel_gk20a *ch = &f->channel[id];
1301 1386
1302 if (non_chid) { 1387 if (non_chid) {
1303 gk20a_fifo_recover(g, BIT(engine_id), true); 1388 gk20a_fifo_recover(g, BIT(engine_id), id, true, true);
1304 ret = true; 1389 ret = true;
1305 goto err; 1390 goto err;
1306 } 1391 }
1307 1392
1393 if (!gk20a_channel_get(ch))
1394 goto err;
1395
1308 if (gk20a_channel_update_and_check_timeout(ch, 1396 if (gk20a_channel_update_and_check_timeout(ch,
1309 GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { 1397 GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) {
1310 gk20a_set_error_notifier(ch, 1398 gk20a_set_error_notifier(ch,
@@ -1313,7 +1401,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1313 "fifo sched ctxsw timeout error:" 1401 "fifo sched ctxsw timeout error:"
1314 "engine = %u, ch = %d", engine_id, id); 1402 "engine = %u, ch = %d", engine_id, id);
1315 gk20a_gr_debug_dump(g->dev); 1403 gk20a_gr_debug_dump(g->dev);
1316 gk20a_fifo_recover(g, BIT(engine_id), 1404 gk20a_fifo_recover(g, BIT(engine_id), id, false,
1317 ch->timeout_debug_dump); 1405 ch->timeout_debug_dump);
1318 ret = true; 1406 ret = true;
1319 } else { 1407 } else {
@@ -1324,6 +1412,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1324 id); 1412 id);
1325 ret = false; 1413 ret = false;
1326 } 1414 }
1415 gk20a_channel_put(ch);
1327 return ret; 1416 return ret;
1328 } 1417 }
1329 1418
@@ -1336,7 +1425,7 @@ err:
1336 1425
1337static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) 1426static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
1338{ 1427{
1339 bool print_channel_reset_log = false, reset_engine = false; 1428 bool print_channel_reset_log = false;
1340 struct device *dev = dev_from_gk20a(g); 1429 struct device *dev = dev_from_gk20a(g);
1341 u32 handled = 0; 1430 u32 handled = 0;
1342 1431
@@ -1367,8 +1456,8 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
1367 } 1456 }
1368 1457
1369 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { 1458 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
1370 print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g); 1459 print_channel_reset_log =
1371 reset_engine = true; 1460 gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false);
1372 handled |= fifo_intr_0_mmu_fault_pending_f(); 1461 handled |= fifo_intr_0_mmu_fault_pending_f();
1373 } 1462 }
1374 1463
@@ -1452,9 +1541,12 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
1452 == fifo_pbdma_status_id_type_chid_v()) { 1541 == fifo_pbdma_status_id_type_chid_v()) {
1453 struct channel_gk20a *ch = &f->channel[id]; 1542 struct channel_gk20a *ch = &f->channel[id];
1454 1543
1455 gk20a_set_error_notifier(ch, 1544 if (gk20a_channel_get(ch)) {
1456 NVGPU_CHANNEL_PBDMA_ERROR); 1545 gk20a_set_error_notifier(ch,
1457 gk20a_fifo_recover_ch(g, id, true); 1546 NVGPU_CHANNEL_PBDMA_ERROR);
1547 gk20a_fifo_recover_ch(g, id, true);
1548 gk20a_channel_put(ch);
1549 }
1458 } else if (fifo_pbdma_status_id_type_v(status) 1550 } else if (fifo_pbdma_status_id_type_v(status)
1459 == fifo_pbdma_status_id_type_tsgid_v()) { 1551 == fifo_pbdma_status_id_type_tsgid_v()) {
1460 struct tsg_gk20a *tsg = &f->tsg[id]; 1552 struct tsg_gk20a *tsg = &f->tsg[id];
@@ -1462,8 +1554,11 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
1462 1554
1463 mutex_lock(&tsg->ch_list_lock); 1555 mutex_lock(&tsg->ch_list_lock);
1464 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 1556 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
1465 gk20a_set_error_notifier(ch, 1557 if (gk20a_channel_get(ch)) {
1466 NVGPU_CHANNEL_PBDMA_ERROR); 1558 gk20a_set_error_notifier(ch,
1559 NVGPU_CHANNEL_PBDMA_ERROR);
1560 gk20a_channel_put(ch);
1561 }
1467 } 1562 }
1468 mutex_unlock(&tsg->ch_list_lock); 1563 mutex_unlock(&tsg->ch_list_lock);
1469 gk20a_fifo_recover_tsg(g, id, true); 1564 gk20a_fifo_recover_tsg(g, id, true);
@@ -1559,6 +1654,8 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1559 + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); 1654 + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
1560 u32 ret = 0; 1655 u32 ret = 0;
1561 1656
1657 gk20a_dbg_fn("%d", id);
1658
1562 /* issue preempt */ 1659 /* issue preempt */
1563 if (is_tsg) 1660 if (is_tsg)
1564 gk20a_writel(g, fifo_preempt_r(), 1661 gk20a_writel(g, fifo_preempt_r(),
@@ -1569,6 +1666,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1569 fifo_preempt_chid_f(id) | 1666 fifo_preempt_chid_f(id) |
1570 fifo_preempt_type_channel_f()); 1667 fifo_preempt_type_channel_f());
1571 1668
1669 gk20a_dbg_fn("%d", id);
1572 /* wait for preempt */ 1670 /* wait for preempt */
1573 ret = -EBUSY; 1671 ret = -EBUSY;
1574 do { 1672 do {
@@ -1583,6 +1681,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1583 } while (time_before(jiffies, end_jiffies) || 1681 } while (time_before(jiffies, end_jiffies) ||
1584 !tegra_platform_is_silicon()); 1682 !tegra_platform_is_silicon());
1585 1683
1684 gk20a_dbg_fn("%d", id);
1586 if (ret) { 1685 if (ret) {
1587 if (is_tsg) { 1686 if (is_tsg) {
1588 struct tsg_gk20a *tsg = &g->fifo.tsg[id]; 1687 struct tsg_gk20a *tsg = &g->fifo.tsg[id];
@@ -1593,8 +1692,11 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1593 1692
1594 mutex_lock(&tsg->ch_list_lock); 1693 mutex_lock(&tsg->ch_list_lock);
1595 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 1694 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
1695 if (!gk20a_channel_get(ch))
1696 continue;
1596 gk20a_set_error_notifier(ch, 1697 gk20a_set_error_notifier(ch,
1597 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 1698 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
1699 gk20a_channel_put(ch);
1598 } 1700 }
1599 mutex_unlock(&tsg->ch_list_lock); 1701 mutex_unlock(&tsg->ch_list_lock);
1600 gk20a_fifo_recover_tsg(g, id, true); 1702 gk20a_fifo_recover_tsg(g, id, true);
@@ -1604,9 +1706,12 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1604 gk20a_err(dev_from_gk20a(g), 1706 gk20a_err(dev_from_gk20a(g),
1605 "preempt channel %d timeout\n", id); 1707 "preempt channel %d timeout\n", id);
1606 1708
1607 gk20a_set_error_notifier(ch, 1709 if (gk20a_channel_get(ch)) {
1608 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 1710 gk20a_set_error_notifier(ch,
1609 gk20a_fifo_recover_ch(g, id, true); 1711 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
1712 gk20a_fifo_recover_ch(g, id, true);
1713 gk20a_channel_put(ch);
1714 }
1610 } 1715 }
1611 } 1716 }
1612 1717
@@ -1790,7 +1895,9 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
1790 (f->engine_info[i].runlist_id == runlist_id)) 1895 (f->engine_info[i].runlist_id == runlist_id))
1791 engines |= BIT(i); 1896 engines |= BIT(i);
1792 } 1897 }
1793 gk20a_fifo_recover(g, engines, true); 1898
1899 if (engines)
1900 gk20a_fifo_recover(g, engines, ~(u32)0, false, true);
1794} 1901}
1795 1902
1796static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) 1903static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
@@ -1994,6 +2101,8 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
1994 u32 mutex_ret; 2101 u32 mutex_ret;
1995 u32 ret = 0; 2102 u32 ret = 0;
1996 2103
2104 gk20a_dbg_fn("");
2105
1997 runlist = &f->runlist_info[runlist_id]; 2106 runlist = &f->runlist_info[runlist_id];
1998 2107
1999 mutex_lock(&runlist->mutex); 2108 mutex_lock(&runlist->mutex);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index dd320ae1..fdf843d2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A graphics fifo (gr host) 4 * GK20A graphics fifo (gr host)
5 * 5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -106,7 +106,9 @@ struct fifo_gk20a {
106 u32 userd_entry_size; 106 u32 userd_entry_size;
107 107
108 struct channel_gk20a *channel; 108 struct channel_gk20a *channel;
109 struct mutex ch_inuse_mutex; /* protect unused chid look up */ 109 /* zero-kref'd channels here */
110 struct list_head free_chs;
111 struct mutex free_chs_mutex;
110 112
111 struct tsg_gk20a *tsg; 113 struct tsg_gk20a *tsg;
112 struct mutex tsg_inuse_mutex; 114 struct mutex tsg_inuse_mutex;
@@ -130,7 +132,7 @@ struct fifo_gk20a {
130 132
131 } intr; 133 } intr;
132 134
133 u32 mmu_fault_engines; 135 u32 deferred_fault_engines;
134 bool deferred_reset_pending; 136 bool deferred_reset_pending;
135 struct mutex deferred_reset_mutex; 137 struct mutex deferred_reset_mutex;
136}; 138};
@@ -157,7 +159,12 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid,
157int gk20a_fifo_suspend(struct gk20a *g); 159int gk20a_fifo_suspend(struct gk20a *g);
158 160
159bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); 161bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
160void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); 162
163void gk20a_fifo_recover(struct gk20a *g,
164 u32 engine_ids, /* if zero, will be queried from HW */
165 u32 hw_id, /* if ~0, will be queried from HW */
166 bool hw_id_is_tsg, /* ignored if hw_id == ~0 */
167 bool verbose);
161void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); 168void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose);
162void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); 169void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose);
163int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); 170int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 9c201f32..498de7e7 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1388,6 +1388,9 @@ static int gk20a_probe(struct platform_device *dev)
1388 return -ENOMEM; 1388 return -ENOMEM;
1389 } 1389 }
1390 1390
1391 init_waitqueue_head(&gk20a->sw_irq_stall_last_handled_wq);
1392 init_waitqueue_head(&gk20a->sw_irq_nonstall_last_handled_wq);
1393
1391#ifdef CONFIG_PM_GENERIC_DOMAINS_OF 1394#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
1392 gk20a_domain = container_of(dev_to_genpd(&dev->dev), 1395 gk20a_domain = container_of(dev_to_genpd(&dev->dev),
1393 struct gk20a_domain_data, gpd); 1396 struct gk20a_domain_data, gpd);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a52d97f3..d8e3586f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -538,6 +538,15 @@ struct gk20a {
538 u32 max_ltc_count; 538 u32 max_ltc_count;
539 u32 ltc_count; 539 u32 ltc_count;
540 540
541 atomic_t hw_irq_stall_count;
542 atomic_t hw_irq_nonstall_count;
543
544 atomic_t sw_irq_stall_last_handled;
545 wait_queue_head_t sw_irq_stall_last_handled_wq;
546
547 atomic_t sw_irq_nonstall_last_handled;
548 wait_queue_head_t sw_irq_nonstall_last_handled_wq;
549
541 struct devfreq *devfreq; 550 struct devfreq *devfreq;
542 551
543 struct gk20a_scale_profile *scale_profile; 552 struct gk20a_scale_profile *scale_profile;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b2fea5b8..edd4c6c8 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
5138 * Also used by regops to translate current ctx to chid and tsgid. 5138 * Also used by regops to translate current ctx to chid and tsgid.
5139 * For performance, we don't want to go through 128 channels every time. 5139 * For performance, we don't want to go through 128 channels every time.
5140 * curr_ctx should be the value read from gr_fecs_current_ctx_r(). 5140 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
5141 * A small tlb is used here to cache translation */ 5141 * A small tlb is used here to cache translation.
5142static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, 5142 *
5143 int *curr_tsgid) 5143 * Returned channel must be freed with gk20a_channel_put() */
5144static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5145 struct gk20a *g, u32 curr_ctx, int *curr_tsgid)
5144{ 5146{
5145 struct fifo_gk20a *f = &g->fifo; 5147 struct fifo_gk20a *f = &g->fifo;
5146 struct gr_gk20a *gr = &g->gr; 5148 struct gr_gk20a *gr = &g->gr;
5147 u32 chid = -1; 5149 u32 chid = -1;
5148 int tsgid = NVGPU_INVALID_TSG_ID; 5150 int tsgid = NVGPU_INVALID_TSG_ID;
5149 u32 i; 5151 u32 i;
5152 struct channel_gk20a *ret = NULL;
5150 5153
5151 /* when contexts are unloaded from GR, the valid bit is reset 5154 /* when contexts are unloaded from GR, the valid bit is reset
5152 * but the instance pointer information remains intact. So the 5155 * but the instance pointer information remains intact. So the
5153 * valid bit must be checked to be absolutely certain that a 5156 * valid bit must be checked to be absolutely certain that a
5154 * valid context is currently resident. */ 5157 * valid context is currently resident. */
5155 if (!gr_fecs_current_ctx_valid_v(curr_ctx)) 5158 if (!gr_fecs_current_ctx_valid_v(curr_ctx))
5156 return -1; 5159 return NULL;
5157 5160
5158 spin_lock(&gr->ch_tlb_lock); 5161 spin_lock(&gr->ch_tlb_lock);
5159 5162
@@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5162 if (gr->chid_tlb[i].curr_ctx == curr_ctx) { 5165 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5163 chid = gr->chid_tlb[i].hw_chid; 5166 chid = gr->chid_tlb[i].hw_chid;
5164 tsgid = gr->chid_tlb[i].tsgid; 5167 tsgid = gr->chid_tlb[i].tsgid;
5168 ret = gk20a_channel_get(&f->channel[chid]);
5165 goto unlock; 5169 goto unlock;
5166 } 5170 }
5167 } 5171 }
5168 5172
5169 /* slow path */ 5173 /* slow path */
5170 for (chid = 0; chid < f->num_channels; chid++) 5174 for (chid = 0; chid < f->num_channels; chid++) {
5171 if (f->channel[chid].in_use) { 5175 struct channel_gk20a *ch = &f->channel[chid];
5172 if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> 5176 if (!gk20a_channel_get(ch))
5173 ram_in_base_shift_v()) == 5177 continue;
5178
5179 if ((u32)(gk20a_mem_phys(&ch->inst_block) >>
5180 ram_in_base_shift_v()) ==
5174 gr_fecs_current_ctx_ptr_v(curr_ctx)) { 5181 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5175 tsgid = f->channel[chid].tsgid; 5182 tsgid = ch->tsgid;
5176 break; 5183 /* found it */
5177 } 5184 ret = ch;
5185 break;
5186 }
5187 gk20a_channel_put(ch);
5178 } 5188 }
5179 5189
5180 if (chid >= f->num_channels) { 5190 if (!ret)
5181 chid = -1;
5182 goto unlock; 5191 goto unlock;
5183 }
5184 5192
5185 /* add to free tlb entry */ 5193 /* add to free tlb entry */
5186 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { 5194 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5205,7 +5213,7 @@ unlock:
5205 spin_unlock(&gr->ch_tlb_lock); 5213 spin_unlock(&gr->ch_tlb_lock);
5206 if (curr_tsgid) 5214 if (curr_tsgid)
5207 *curr_tsgid = tsgid; 5215 *curr_tsgid = tsgid;
5208 return chid; 5216 return ret;
5209} 5217}
5210 5218
5211int gk20a_gr_lock_down_sm(struct gk20a *g, 5219int gk20a_gr_lock_down_sm(struct gk20a *g,
@@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g)
5399 u32 obj_table; 5407 u32 obj_table;
5400 int need_reset = 0; 5408 int need_reset = 0;
5401 u32 gr_intr = gk20a_readl(g, gr_intr_r()); 5409 u32 gr_intr = gk20a_readl(g, gr_intr_r());
5410 struct channel_gk20a *ch = NULL;
5402 5411
5403 gk20a_dbg_fn(""); 5412 gk20a_dbg_fn("");
5404 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); 5413 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g)
5424 gr_fe_object_table_r(isr_data.sub_chan)) : 0; 5433 gr_fe_object_table_r(isr_data.sub_chan)) : 0;
5425 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); 5434 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5426 5435
5427 isr_data.chid = 5436 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL);
5428 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); 5437 if (!ch) {
5429 if (isr_data.chid == -1) {
5430 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", 5438 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5431 isr_data.curr_ctx); 5439 isr_data.curr_ctx);
5432 goto clean_up; 5440 goto clean_up;
5433 } 5441 }
5442 isr_data.chid = ch->hw_chid;
5434 5443
5435 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5444 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5436 "channel %d: addr 0x%08x, " 5445 "channel %d: addr 0x%08x, "
@@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g)
5512 5521
5513 if (gr_intr & gr_intr_exception_pending_f()) { 5522 if (gr_intr & gr_intr_exception_pending_f()) {
5514 u32 exception = gk20a_readl(g, gr_exception_r()); 5523 u32 exception = gk20a_readl(g, gr_exception_r());
5515 struct fifo_gk20a *f = &g->fifo;
5516 struct channel_gk20a *ch = &f->channel[isr_data.chid];
5517 5524
5518 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); 5525 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
5519 5526
@@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g)
5572 } 5579 }
5573 5580
5574 if (need_reset) 5581 if (need_reset)
5575 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); 5582 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
5583 ~(u32)0, false, true);
5576 5584
5577clean_up: 5585clean_up:
5586 if (gr_intr && !ch) {
5587 /* Clear interrupts for unused channel. This is
5588 probably an interrupt during gk20a_free_channel() */
5589 gk20a_err(dev_from_gk20a(g),
5590 "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing",
5591 gr_intr);
5592 gk20a_writel(g, gr_intr_r(), gr_intr);
5593 gr_intr = 0;
5594 }
5595
5578 gk20a_writel(g, gr_gpfifo_ctl_r(), 5596 gk20a_writel(g, gr_gpfifo_ctl_r(),
5579 grfifo_ctl | gr_gpfifo_ctl_access_f(1) | 5597 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
5580 gr_gpfifo_ctl_semaphore_access_f(1)); 5598 gr_gpfifo_ctl_semaphore_access_f(1));
@@ -5583,6 +5601,9 @@ clean_up:
5583 gk20a_err(dev_from_gk20a(g), 5601 gk20a_err(dev_from_gk20a(g),
5584 "unhandled gr interrupt 0x%08x", gr_intr); 5602 "unhandled gr interrupt 0x%08x", gr_intr);
5585 5603
5604 if (ch)
5605 gk20a_channel_put(ch);
5606
5586 return 0; 5607 return 0;
5587} 5608}
5588 5609
@@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6670 6691
6671bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) 6692bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
6672{ 6693{
6673 int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; 6694 int curr_gr_ctx, curr_gr_tsgid;
6674 struct gk20a *g = ch->g; 6695 struct gk20a *g = ch->g;
6696 struct channel_gk20a *curr_ch;
6697 bool ret = false;
6675 6698
6676 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); 6699 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6677 curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, 6700 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
6678 &curr_gr_tsgid); 6701 &curr_gr_tsgid);
6679 6702
6680 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, 6703 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
6681 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" 6704 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
6682 " ch->hw_chid=%d", curr_gr_chid, 6705 " ch->hw_chid=%d",
6683 curr_gr_tsgid, ch->tsgid, ch->hw_chid); 6706 curr_ch ? curr_ch->hw_chid : -1,
6684 6707 curr_gr_tsgid,
6685 if (curr_gr_chid == -1) 6708 ch->tsgid,
6709 ch->hw_chid);
6710
6711 if (!curr_ch)
6686 return false; 6712 return false;
6687 6713
6688 if (ch->hw_chid == curr_gr_chid) 6714 if (ch->hw_chid == curr_ch->hw_chid)
6689 return true; 6715 ret = true;
6690 6716
6691 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) 6717 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))
6692 return true; 6718 ret = true;
6693 6719
6694 return false; 6720 gk20a_channel_put(curr_ch);
6721 return ret;
6695} 6722}
6696 6723
6697int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 6724int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
index 06b00a25..0a773d10 100644
--- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
@@ -40,6 +40,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
40 /* flush previous write */ 40 /* flush previous write */
41 gk20a_readl(g, mc_intr_en_0_r()); 41 gk20a_readl(g, mc_intr_en_0_r());
42 42
43 atomic_inc(&g->hw_irq_stall_count);
44
43 trace_mc_gk20a_intr_stall_done(g->dev->name); 45 trace_mc_gk20a_intr_stall_done(g->dev->name);
44 46
45 return IRQ_WAKE_THREAD; 47 return IRQ_WAKE_THREAD;
@@ -63,18 +65,22 @@ irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g)
63 /* flush previous write */ 65 /* flush previous write */
64 gk20a_readl(g, mc_intr_en_1_r()); 66 gk20a_readl(g, mc_intr_en_1_r());
65 67
68 atomic_inc(&g->hw_irq_nonstall_count);
69
66 return IRQ_WAKE_THREAD; 70 return IRQ_WAKE_THREAD;
67} 71}
68 72
69irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) 73irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
70{ 74{
71 u32 mc_intr_0; 75 u32 mc_intr_0;
76 int hw_irq_count;
72 77
73 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); 78 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
74 79
75 trace_mc_gk20a_intr_thread_stall(g->dev->name); 80 trace_mc_gk20a_intr_thread_stall(g->dev->name);
76 81
77 mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); 82 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
83 hw_irq_count = atomic_read(&g->hw_irq_stall_count);
78 84
79 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); 85 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
80 86
@@ -94,12 +100,17 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
94 if (mc_intr_0 & mc_intr_0_pbus_pending_f()) 100 if (mc_intr_0 & mc_intr_0_pbus_pending_f())
95 gk20a_pbus_isr(g); 101 gk20a_pbus_isr(g);
96 102
103 /* sync handled irq counter before re-enabling interrupts */
104 atomic_set(&g->sw_irq_stall_last_handled, hw_irq_count);
105
97 gk20a_writel(g, mc_intr_en_0_r(), 106 gk20a_writel(g, mc_intr_en_0_r(),
98 mc_intr_en_0_inta_hardware_f()); 107 mc_intr_en_0_inta_hardware_f());
99 108
100 /* flush previous write */ 109 /* flush previous write */
101 gk20a_readl(g, mc_intr_en_0_r()); 110 gk20a_readl(g, mc_intr_en_0_r());
102 111
112 wake_up_all(&g->sw_irq_stall_last_handled_wq);
113
103 trace_mc_gk20a_intr_thread_stall_done(g->dev->name); 114 trace_mc_gk20a_intr_thread_stall_done(g->dev->name);
104 115
105 return IRQ_HANDLED; 116 return IRQ_HANDLED;
@@ -108,10 +119,12 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
108irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) 119irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
109{ 120{
110 u32 mc_intr_1; 121 u32 mc_intr_1;
122 int hw_irq_count;
111 123
112 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); 124 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
113 125
114 mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); 126 mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
127 hw_irq_count = atomic_read(&g->hw_irq_nonstall_count);
115 128
116 gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); 129 gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
117 130
@@ -125,12 +138,17 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
125 && g->ops.ce2.isr_nonstall) 138 && g->ops.ce2.isr_nonstall)
126 g->ops.ce2.isr_nonstall(g); 139 g->ops.ce2.isr_nonstall(g);
127 140
141 /* sync handled irq counter before re-enabling interrupts */
142 atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count);
143
128 gk20a_writel(g, mc_intr_en_1_r(), 144 gk20a_writel(g, mc_intr_en_1_r(),
129 mc_intr_en_1_inta_hardware_f()); 145 mc_intr_en_1_inta_hardware_f());
130 146
131 /* flush previous write */ 147 /* flush previous write */
132 gk20a_readl(g, mc_intr_en_1_r()); 148 gk20a_readl(g, mc_intr_en_1_r());
133 149
150 wake_up_all(&g->sw_irq_stall_last_handled_wq);
151
134 return IRQ_HANDLED; 152 return IRQ_HANDLED;
135} 153}
136 154
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 68a31eca..23ff8677 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -283,6 +283,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
283 283
284 init_runlist(g, f); 284 init_runlist(g, f);
285 285
286 INIT_LIST_HEAD(&f->free_chs);
287 mutex_init(&f->free_chs_mutex);
288
286 for (chid = 0; chid < f->num_channels; chid++) { 289 for (chid = 0; chid < f->num_channels; chid++) {
287 f->channel[chid].userd_cpu_va = 290 f->channel[chid].userd_cpu_va =
288 f->userd.cpu_va + chid * f->userd_entry_size; 291 f->userd.cpu_va + chid * f->userd_entry_size;
@@ -294,7 +297,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
294 297
295 gk20a_init_channel_support(g, chid); 298 gk20a_init_channel_support(g, chid);
296 } 299 }
297 mutex_init(&f->ch_inuse_mutex);
298 300
299 f->deferred_reset_pending = false; 301 f->deferred_reset_pending = false;
300 mutex_init(&f->deferred_reset_mutex); 302 mutex_init(&f->deferred_reset_mutex);
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index ad738f43..461ff6e8 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -140,12 +140,54 @@ DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done,
140 TP_ARGS(name) 140 TP_ARGS(name)
141); 141);
142 142
143TRACE_EVENT(gk20a_channel_update, 143DECLARE_EVENT_CLASS(gk20a_channel,
144 TP_PROTO(const void *channel), 144 TP_PROTO(int channel),
145 TP_ARGS(channel), 145 TP_ARGS(channel),
146 TP_STRUCT__entry(__field(const void *, channel)), 146 TP_STRUCT__entry(__field(int, channel)),
147 TP_fast_assign(__entry->channel = channel;), 147 TP_fast_assign(__entry->channel = channel;),
148 TP_printk("channel=%p", __entry->channel) 148 TP_printk("ch id %d", __entry->channel)
149);
150DEFINE_EVENT(gk20a_channel, gk20a_channel_update,
151 TP_PROTO(int channel),
152 TP_ARGS(channel)
153);
154DEFINE_EVENT(gk20a_channel, gk20a_free_channel,
155 TP_PROTO(int channel),
156 TP_ARGS(channel)
157);
158DEFINE_EVENT(gk20a_channel, gk20a_open_new_channel,
159 TP_PROTO(int channel),
160 TP_ARGS(channel)
161);
162DEFINE_EVENT(gk20a_channel, gk20a_release_used_channel,
163 TP_PROTO(int channel),
164 TP_ARGS(channel)
165);
166
167DECLARE_EVENT_CLASS(gk20a_channel_getput,
168 TP_PROTO(int channel, const char *caller),
169 TP_ARGS(channel, caller),
170 TP_STRUCT__entry(
171 __field(int, channel)
172 __field(const char *, caller)
173 ),
174 TP_fast_assign(
175 __entry->channel = channel;
176 __entry->caller = caller;
177 ),
178 TP_printk("channel %d caller %s", __entry->channel, __entry->caller)
179);
180DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_get,
181 TP_PROTO(int channel, const char *caller),
182 TP_ARGS(channel, caller)
183);
184DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put,
185 TP_PROTO(int channel, const char *caller),
186 TP_ARGS(channel, caller)
187);
188DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put_nofree,
189 TP_PROTO(int channel, const char *caller),
190 TP_ARGS(channel, caller)
149); 191);
150 192
151TRACE_EVENT(gk20a_push_cmdbuf, 193TRACE_EVENT(gk20a_push_cmdbuf,