summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2015-03-06 09:33:43 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-06-09 14:13:43 -0400
commit6085c90f499c642bc41a646b0efbdfe60e096c74 (patch)
tree0eaab99b228ce162ec3a44d0f8138b441f5a64f4
parenta41e5c41cadaa3d030a1f75b09328b8b1a440b69 (diff)
gpu: nvgpu: add per-channel refcounting
Add reference counting for channels, and wait for reference count to get to 0 in gk20a_channel_free() before actually freeing the channel. Also, change free channel tracking a bit by employing a list of free channels, which simplifies the procedure of finding available channels with reference counting. Each use of a channel must have a reference taken before use or held by the caller. Taking a reference of a wild channel pointer may fail, if the channel is either not opened or in a process of being closed. Also, add safeguards for protecting accidental use of closed channels, specifically, by setting ch->g = NULL in channel free. This will make it obvious if freed channel is attempted to be used. The last user of a channel might be the deferred interrupt handler, so wait for deferred interrupts to be processed twice in the channel free procedure: once for providing last notifications to the channel and once to make sure there are no stale pointers left after referencing to the channel has been denied. Finally, fix some races in channel and TSG force reset IOCTL path, by pausing the channel scheduler in gk20a_fifo_recover_ch() and gk20a_fifo_recover_tsg(), while the affected engines have been identified, the appropriate MMU faults triggered, and the MMU faults handled. In this case, make sure that the MMU fault does not attempt to query the hardware about the failing channel or TSG ids. This should make channel recovery more safe also in the regular (i.e., not in the interrupt handler) context. Bug 1530226 Bug 1597493 Bug 1625901 Bug 200076344 Bug 200071810 Change-Id: Ib274876908e18219c64ea41e50ca443df81d957b Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/448463 (cherry picked from commit 3f03aeae64ef2af4829e06f5f63062e8ebd21353) Reviewed-on: http://git-master/r/755147 Reviewed-by: Automatic_Commit_Validation_User
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c302
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h32
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c58
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c34
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c247
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h15
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c93
-rw-r--r--drivers/gpu/nvgpu/gk20a/mc_gk20a.c18
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c4
-rw-r--r--include/trace/events/gk20a.h50
13 files changed, 681 insertions, 188 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 4a3076b5..b4fdfb44 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Color decompression engine support 2 * Color decompression engine support
3 * 3 *
4 * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. 4 * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -74,7 +74,7 @@ __must_hold(&cde_app->mutex)
74 trace_gk20a_cde_remove_ctx(cde_ctx); 74 trace_gk20a_cde_remove_ctx(cde_ctx);
75 75
76 /* free the channel */ 76 /* free the channel */
77 gk20a_free_channel(cde_ctx->ch, true); 77 gk20a_channel_close(ch);
78 78
79 /* ..then release mapped memory */ 79 /* ..then release mapped memory */
80 gk20a_deinit_cde_img(cde_ctx); 80 gk20a_deinit_cde_img(cde_ctx);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index c12f196d..5a71e874 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -42,8 +42,8 @@
42 42
43#define NVMAP_HANDLE_PARAM_SIZE 1 43#define NVMAP_HANDLE_PARAM_SIZE 1
44 44
45static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f); 45static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
46static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c); 46static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47 47
48static void free_priv_cmdbuf(struct channel_gk20a *c, 48static void free_priv_cmdbuf(struct channel_gk20a *c,
49 struct priv_cmd_entry *e); 49 struct priv_cmd_entry *e);
@@ -61,29 +61,33 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c,
61 bool add); 61 bool add);
62static void gk20a_free_error_notifiers(struct channel_gk20a *ch); 62static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
63 63
64static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f) 64/* allocate GPU channel */
65static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
65{ 66{
66 struct channel_gk20a *ch = NULL; 67 struct channel_gk20a *ch = NULL;
67 int chid;
68 68
69 mutex_lock(&f->ch_inuse_mutex); 69 mutex_lock(&f->free_chs_mutex);
70 for (chid = 0; chid < f->num_channels; chid++) { 70 if (!list_empty(&f->free_chs)) {
71 if (!f->channel[chid].in_use) { 71 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
72 f->channel[chid].in_use = true; 72 free_chs);
73 ch = &f->channel[chid]; 73 list_del(&ch->free_chs);
74 break; 74 WARN_ON(atomic_read(&ch->ref_count));
75 } 75 WARN_ON(ch->referenceable);
76 } 76 }
77 mutex_unlock(&f->ch_inuse_mutex); 77 mutex_unlock(&f->free_chs_mutex);
78 78
79 return ch; 79 return ch;
80} 80}
81 81
82static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c) 82static void free_channel(struct fifo_gk20a *f,
83 struct channel_gk20a *ch)
83{ 84{
84 mutex_lock(&f->ch_inuse_mutex); 85 trace_gk20a_release_used_channel(ch->hw_chid);
85 f->channel[c->hw_chid].in_use = false; 86 /* refcount is zero here and channel is in a freed/dead state */
86 mutex_unlock(&f->ch_inuse_mutex); 87 mutex_lock(&f->free_chs_mutex);
88 /* add to head to increase visibility of timing-related bugs */
89 list_add(&ch->free_chs, &f->free_chs);
90 mutex_unlock(&f->free_chs_mutex);
87} 91}
88 92
89int channel_gk20a_commit_va(struct channel_gk20a *c) 93int channel_gk20a_commit_va(struct channel_gk20a *c)
@@ -361,6 +365,11 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
361 struct channel_gk20a_job *job, *n; 365 struct channel_gk20a_job *job, *n;
362 bool released_job_semaphore = false; 366 bool released_job_semaphore = false;
363 367
368 gk20a_dbg_fn("");
369
370 /* make sure new kickoffs are prevented */
371 ch->has_timedout = true;
372
364 /* ensure no fences are pending */ 373 /* ensure no fences are pending */
365 mutex_lock(&ch->submit_lock); 374 mutex_lock(&ch->submit_lock);
366 if (ch->sync) 375 if (ch->sync)
@@ -416,6 +425,8 @@ void gk20a_disable_channel(struct channel_gk20a *ch,
416 bool finish, 425 bool finish,
417 unsigned long finish_timeout) 426 unsigned long finish_timeout)
418{ 427{
428 gk20a_dbg_fn("");
429
419 if (finish) { 430 if (finish) {
420 int err = gk20a_channel_finish(ch, finish_timeout); 431 int err = gk20a_channel_finish(ch, finish_timeout);
421 WARN_ON(err); 432 WARN_ON(err);
@@ -627,8 +638,9 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
627 (u32)(nsec >> 32); 638 (u32)(nsec >> 32);
628 ch->error_notifier->info32 = error; 639 ch->error_notifier->info32 = error;
629 ch->error_notifier->status = 0xffff; 640 ch->error_notifier->status = 0xffff;
641
630 gk20a_err(dev_from_gk20a(ch->g), 642 gk20a_err(dev_from_gk20a(ch->g),
631 "error notifier set to %d for ch %d\n", error, ch->hw_chid); 643 "error notifier set to %d for ch %d", error, ch->hw_chid);
632 } 644 }
633} 645}
634 646
@@ -643,7 +655,53 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
643 } 655 }
644} 656}
645 657
646void gk20a_free_channel(struct channel_gk20a *ch, bool finish) 658/* Returns delta of cyclic integers a and b. If a is ahead of b, delta
659 * is positive */
660static int cyclic_delta(int a, int b)
661{
662 return a - b;
663}
664
665static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
666{
667 int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
668 int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
669
670 /* wait until all stalling irqs are handled */
671 wait_event(g->sw_irq_stall_last_handled_wq,
672 cyclic_delta(stall_irq_threshold,
673 atomic_read(&g->sw_irq_stall_last_handled))
674 <= 0);
675
676 /* wait until all non-stalling irqs are handled */
677 wait_event(g->sw_irq_nonstall_last_handled_wq,
678 cyclic_delta(nonstall_irq_threshold,
679 atomic_read(&g->sw_irq_nonstall_last_handled))
680 <= 0);
681}
682
683static void gk20a_wait_until_counter_is_N(
684 struct channel_gk20a *ch, atomic_t *counter, int wait_value,
685 wait_queue_head_t *wq, const char *caller, const char *counter_name)
686{
687 while (true) {
688 if (wait_event_timeout(
689 *wq,
690 atomic_read(counter) == wait_value,
691 msecs_to_jiffies(5000)) > 0)
692 break;
693
694 gk20a_warn(dev_from_gk20a(ch->g),
695 "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
696 caller, ch->hw_chid, counter_name,
697 atomic_read(counter), wait_value);
698 }
699}
700
701
702
703/* call ONLY when no references to the channel exist: after the last put */
704static void gk20a_free_channel(struct channel_gk20a *ch)
647{ 705{
648 struct gk20a *g = ch->g; 706 struct gk20a *g = ch->g;
649 struct fifo_gk20a *f = &g->fifo; 707 struct fifo_gk20a *f = &g->fifo;
@@ -654,13 +712,50 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
654 712
655 gk20a_dbg_fn(""); 713 gk20a_dbg_fn("");
656 714
715 WARN_ON(ch->g == NULL);
716
717 trace_gk20a_free_channel(ch->hw_chid);
718
719 /* prevent new kickoffs */
720 ch->has_timedout = true;
721 wmb();
722
723 /* wait until there's only our ref to the channel */
724 gk20a_wait_until_counter_is_N(
725 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
726 __func__, "references");
727
728 /* wait until all pending interrupts for recently completed
729 * jobs are handled */
730 gk20a_wait_for_deferred_interrupts(g);
731
732 /* prevent new refs */
733 spin_lock(&ch->ref_obtain_lock);
734 if (!ch->referenceable) {
735 spin_unlock(&ch->ref_obtain_lock);
736 gk20a_err(dev_from_gk20a(ch->g),
737 "Extra %s() called to channel %u",
738 __func__, ch->hw_chid);
739 return;
740 }
741 ch->referenceable = false;
742 spin_unlock(&ch->ref_obtain_lock);
743
744 /* matches with the initial reference in gk20a_open_new_channel() */
745 atomic_dec(&ch->ref_count);
746
747 /* wait until no more refs to the channel */
748 gk20a_wait_until_counter_is_N(
749 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
750 __func__, "references");
751
657 /* if engine reset was deferred, perform it now */ 752 /* if engine reset was deferred, perform it now */
658 mutex_lock(&f->deferred_reset_mutex); 753 mutex_lock(&f->deferred_reset_mutex);
659 if (g->fifo.deferred_reset_pending) { 754 if (g->fifo.deferred_reset_pending) {
660 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" 755 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
661 " deferred, running now"); 756 " deferred, running now");
662 gk20a_fifo_reset_engine(g, g->fifo.mmu_fault_engines); 757 gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines);
663 g->fifo.mmu_fault_engines = 0; 758 g->fifo.deferred_fault_engines = 0;
664 g->fifo.deferred_reset_pending = false; 759 g->fifo.deferred_reset_pending = false;
665 } 760 }
666 mutex_unlock(&f->deferred_reset_mutex); 761 mutex_unlock(&f->deferred_reset_mutex);
@@ -674,7 +769,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
674 gk20a_dbg_info("freeing bound channel context, timeout=%ld", 769 gk20a_dbg_info("freeing bound channel context, timeout=%ld",
675 timeout); 770 timeout);
676 771
677 gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout); 772 gk20a_disable_channel(ch, !ch->has_timedout, timeout);
678 773
679 gk20a_free_error_notifiers(ch); 774 gk20a_free_error_notifiers(ch);
680 775
@@ -714,6 +809,10 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
714 spin_unlock(&ch->update_fn_lock); 809 spin_unlock(&ch->update_fn_lock);
715 cancel_work_sync(&ch->update_fn_work); 810 cancel_work_sync(&ch->update_fn_work);
716 811
812 /* make sure we don't have deferred interrupts pending that
813 * could still touch the channel */
814 gk20a_wait_for_deferred_interrupts(g);
815
717unbind: 816unbind:
718 if (gk20a_is_channel_marked_as_tsg(ch)) 817 if (gk20a_is_channel_marked_as_tsg(ch))
719 gk20a_tsg_unbind_channel(ch); 818 gk20a_tsg_unbind_channel(ch);
@@ -743,8 +842,66 @@ unbind:
743 mutex_unlock(&ch->dbg_s_lock); 842 mutex_unlock(&ch->dbg_s_lock);
744 843
745release: 844release:
845 /* make sure we catch accesses of unopened channels in case
846 * there's non-refcounted channel pointers hanging around */
847 ch->g = NULL;
848 wmb();
849
746 /* ALWAYS last */ 850 /* ALWAYS last */
747 release_used_channel(f, ch); 851 free_channel(f, ch);
852}
853
854/* Try to get a reference to the channel. Return nonzero on success. If fails,
855 * the channel is dead or being freed elsewhere and you must not touch it.
856 *
857 * Always when a channel_gk20a pointer is seen and about to be used, a
858 * reference must be held to it - either by you or the caller, which should be
859 * documented well or otherwise clearly seen. This usually boils down to the
860 * file from ioctls directly, or an explicit get in exception handlers when the
861 * channel is found by a hw_chid.
862 *
863 * Most global functions in this file require a reference to be held by the
864 * caller.
865 */
866struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
867 const char *caller) {
868 struct channel_gk20a *ret;
869
870 spin_lock(&ch->ref_obtain_lock);
871
872 if (likely(ch->referenceable)) {
873 atomic_inc(&ch->ref_count);
874 ret = ch;
875 } else
876 ret = NULL;
877
878 spin_unlock(&ch->ref_obtain_lock);
879
880 if (ret)
881 trace_gk20a_channel_get(ch->hw_chid, caller);
882
883 return ret;
884}
885
886void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
887{
888 trace_gk20a_channel_put(ch->hw_chid, caller);
889 atomic_dec(&ch->ref_count);
890 wake_up_all(&ch->ref_count_dec_wq);
891
892 /* More puts than gets. Channel is probably going to get
893 * stuck. */
894 WARN_ON(atomic_read(&ch->ref_count) < 0);
895
896 /* Also, more puts than gets. ref_count can go to 0 only if
897 * the channel is closing. Channel is probably going to get
898 * stuck. */
899 WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
900}
901
902void gk20a_channel_close(struct channel_gk20a *ch)
903{
904 gk20a_free_channel(ch);
748} 905}
749 906
750int gk20a_channel_release(struct inode *inode, struct file *filp) 907int gk20a_channel_release(struct inode *inode, struct file *filp)
@@ -758,14 +915,14 @@ int gk20a_channel_release(struct inode *inode, struct file *filp)
758 915
759 trace_gk20a_channel_release(dev_name(&g->dev->dev)); 916 trace_gk20a_channel_release(dev_name(&g->dev->dev));
760 917
761 err = gk20a_busy(ch->g->dev); 918 err = gk20a_busy(g->dev);
762 if (err) { 919 if (err) {
763 gk20a_err(dev_from_gk20a(g), "failed to release channel %d", 920 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
764 ch->hw_chid); 921 ch->hw_chid);
765 return err; 922 return err;
766 } 923 }
767 gk20a_free_channel(ch, true); 924 gk20a_channel_close(ch);
768 gk20a_idle(ch->g->dev); 925 gk20a_idle(g->dev);
769 926
770 filp->private_data = NULL; 927 filp->private_data = NULL;
771 return 0; 928 return 0;
@@ -808,22 +965,31 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
808 struct fifo_gk20a *f = &g->fifo; 965 struct fifo_gk20a *f = &g->fifo;
809 struct channel_gk20a *ch; 966 struct channel_gk20a *ch;
810 967
811 ch = acquire_unused_channel(f); 968 gk20a_dbg_fn("");
969
970 ch = allocate_channel(f);
812 if (ch == NULL) { 971 if (ch == NULL) {
813 /* TBD: we want to make this virtualizable */ 972 /* TBD: we want to make this virtualizable */
814 gk20a_err(dev_from_gk20a(g), "out of hw chids"); 973 gk20a_err(dev_from_gk20a(g), "out of hw chids");
815 return NULL; 974 return NULL;
816 } 975 }
817 976
977 trace_gk20a_open_new_channel(ch->hw_chid);
978
979 BUG_ON(ch->g);
818 ch->g = g; 980 ch->g = g;
819 981
820 if (g->ops.fifo.alloc_inst(g, ch)) { 982 if (g->ops.fifo.alloc_inst(g, ch)) {
821 ch->in_use = false; 983 ch->g = NULL;
984 free_channel(f, ch);
822 gk20a_err(dev_from_gk20a(g), 985 gk20a_err(dev_from_gk20a(g),
823 "failed to open gk20a channel, out of inst mem"); 986 "failed to open gk20a channel, out of inst mem");
824
825 return NULL; 987 return NULL;
826 } 988 }
989
990 /* now the channel is in a limbo out of the free list but not marked as
991 * alive and used (i.e. get-able) yet */
992
827 ch->pid = current->pid; 993 ch->pid = current->pid;
828 994
829 /* By default, channel is regular (non-TSG) channel */ 995 /* By default, channel is regular (non-TSG) channel */
@@ -854,6 +1020,13 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
854 spin_lock_init(&ch->update_fn_lock); 1020 spin_lock_init(&ch->update_fn_lock);
855 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); 1021 INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
856 1022
1023 /* Mark the channel alive, get-able, with 1 initial use
1024 * references. The initial reference will be decreased in
1025 * gk20a_free_channel() */
1026 ch->referenceable = true;
1027 atomic_set(&ch->ref_count, 1);
1028 wmb();
1029
857 return ch; 1030 return ch;
858} 1031}
859 1032
@@ -1379,7 +1552,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1379 struct mapped_buffer_node **mapped_buffers = NULL; 1552 struct mapped_buffer_node **mapped_buffers = NULL;
1380 int err = 0, num_mapped_buffers; 1553 int err = 0, num_mapped_buffers;
1381 1554
1382 /* job needs reference to this vm */ 1555 /* job needs reference to this vm (released in channel_update) */
1383 gk20a_vm_get(vm); 1556 gk20a_vm_get(vm);
1384 1557
1385 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); 1558 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
@@ -1395,14 +1568,21 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1395 return -ENOMEM; 1568 return -ENOMEM;
1396 } 1569 }
1397 1570
1398 job->num_mapped_buffers = num_mapped_buffers; 1571 /* put() is done in gk20a_channel_update() when the job is done */
1399 job->mapped_buffers = mapped_buffers; 1572 c = gk20a_channel_get(c);
1400 job->pre_fence = gk20a_fence_get(pre_fence);
1401 job->post_fence = gk20a_fence_get(post_fence);
1402 1573
1403 mutex_lock(&c->jobs_lock); 1574 if (c) {
1404 list_add_tail(&job->list, &c->jobs); 1575 job->num_mapped_buffers = num_mapped_buffers;
1405 mutex_unlock(&c->jobs_lock); 1576 job->mapped_buffers = mapped_buffers;
1577 job->pre_fence = gk20a_fence_get(pre_fence);
1578 job->post_fence = gk20a_fence_get(post_fence);
1579
1580 mutex_lock(&c->jobs_lock);
1581 list_add_tail(&job->list, &c->jobs);
1582 mutex_unlock(&c->jobs_lock);
1583 } else {
1584 return -ETIMEDOUT;
1585 }
1406 1586
1407 return 0; 1587 return 0;
1408} 1588}
@@ -1412,13 +1592,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1412 struct vm_gk20a *vm = c->vm; 1592 struct vm_gk20a *vm = c->vm;
1413 struct channel_gk20a_job *job, *n; 1593 struct channel_gk20a_job *job, *n;
1414 1594
1415 trace_gk20a_channel_update(c); 1595 trace_gk20a_channel_update(c->hw_chid);
1416 1596
1417 wake_up(&c->submit_wq); 1597 wake_up(&c->submit_wq);
1418 1598
1419 mutex_lock(&c->submit_lock); 1599 mutex_lock(&c->submit_lock);
1420 mutex_lock(&c->jobs_lock); 1600 mutex_lock(&c->jobs_lock);
1421 list_for_each_entry_safe(job, n, &c->jobs, list) { 1601 list_for_each_entry_safe(job, n, &c->jobs, list) {
1602 struct gk20a *g = c->g;
1603
1422 bool completed = gk20a_fence_is_expired(job->post_fence); 1604 bool completed = gk20a_fence_is_expired(job->post_fence);
1423 if (!completed) 1605 if (!completed)
1424 break; 1606 break;
@@ -1434,12 +1616,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1434 gk20a_fence_put(job->pre_fence); 1616 gk20a_fence_put(job->pre_fence);
1435 gk20a_fence_put(job->post_fence); 1617 gk20a_fence_put(job->post_fence);
1436 1618
1437 /* job is done. release its reference to vm */ 1619 /* job is done. release its vm reference (taken in add_job) */
1438 gk20a_vm_put(vm); 1620 gk20a_vm_put(vm);
1621 /* another bookkeeping taken in add_job. caller must hold a ref
1622 * so this wouldn't get freed here. */
1623 gk20a_channel_put(c);
1439 1624
1440 list_del_init(&job->list); 1625 list_del_init(&job->list);
1441 kfree(job); 1626 kfree(job);
1442 gk20a_idle(c->g->dev); 1627 gk20a_idle(g->dev);
1443 } 1628 }
1444 1629
1445 /* 1630 /*
@@ -1719,10 +1904,13 @@ clean_up:
1719int gk20a_init_channel_support(struct gk20a *g, u32 chid) 1904int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1720{ 1905{
1721 struct channel_gk20a *c = g->fifo.channel+chid; 1906 struct channel_gk20a *c = g->fifo.channel+chid;
1722 c->g = g; 1907 c->g = NULL;
1723 c->in_use = false;
1724 c->hw_chid = chid; 1908 c->hw_chid = chid;
1725 c->bound = false; 1909 c->bound = false;
1910 spin_lock_init(&c->ref_obtain_lock);
1911 atomic_set(&c->ref_count, 0);
1912 c->referenceable = false;
1913 init_waitqueue_head(&c->ref_count_dec_wq);
1726 mutex_init(&c->ioctl_lock); 1914 mutex_init(&c->ioctl_lock);
1727 mutex_init(&c->jobs_lock); 1915 mutex_init(&c->jobs_lock);
1728 mutex_init(&c->submit_lock); 1916 mutex_init(&c->submit_lock);
@@ -1733,6 +1921,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1733#endif 1921#endif
1734 INIT_LIST_HEAD(&c->dbg_s_list); 1922 INIT_LIST_HEAD(&c->dbg_s_list);
1735 mutex_init(&c->dbg_s_lock); 1923 mutex_init(&c->dbg_s_lock);
1924 list_add(&c->free_chs, &g->fifo.free_chs);
1736 1925
1737 return 0; 1926 return 0;
1738} 1927}
@@ -2066,8 +2255,7 @@ int gk20a_channel_suspend(struct gk20a *g)
2066 2255
2067 for (chid = 0; chid < f->num_channels; chid++) { 2256 for (chid = 0; chid < f->num_channels; chid++) {
2068 struct channel_gk20a *ch = &f->channel[chid]; 2257 struct channel_gk20a *ch = &f->channel[chid];
2069 if (ch->in_use) { 2258 if (gk20a_channel_get(ch)) {
2070
2071 gk20a_dbg_info("suspend channel %d", chid); 2259 gk20a_dbg_info("suspend channel %d", chid);
2072 /* disable channel */ 2260 /* disable channel */
2073 g->ops.fifo.disable_channel(ch); 2261 g->ops.fifo.disable_channel(ch);
@@ -2079,6 +2267,8 @@ int gk20a_channel_suspend(struct gk20a *g)
2079 flush_work(&ch->update_fn_work); 2267 flush_work(&ch->update_fn_work);
2080 2268
2081 channels_in_use = true; 2269 channels_in_use = true;
2270
2271 gk20a_channel_put(ch);
2082 } 2272 }
2083 } 2273 }
2084 2274
@@ -2086,8 +2276,10 @@ int gk20a_channel_suspend(struct gk20a *g)
2086 g->ops.fifo.update_runlist(g, 0, ~0, false, true); 2276 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2087 2277
2088 for (chid = 0; chid < f->num_channels; chid++) { 2278 for (chid = 0; chid < f->num_channels; chid++) {
2089 if (f->channel[chid].in_use) 2279 if (gk20a_channel_get(&f->channel[chid])) {
2090 g->ops.fifo.unbind_channel(&f->channel[chid]); 2280 g->ops.fifo.unbind_channel(&f->channel[chid]);
2281 gk20a_channel_put(&f->channel[chid]);
2282 }
2091 } 2283 }
2092 } 2284 }
2093 2285
@@ -2095,8 +2287,6 @@ int gk20a_channel_suspend(struct gk20a *g)
2095 return 0; 2287 return 0;
2096} 2288}
2097 2289
2098/* in this context the "channel" is the host1x channel which
2099 * maps to *all* gk20a channels */
2100int gk20a_channel_resume(struct gk20a *g) 2290int gk20a_channel_resume(struct gk20a *g)
2101{ 2291{
2102 struct fifo_gk20a *f = &g->fifo; 2292 struct fifo_gk20a *f = &g->fifo;
@@ -2106,10 +2296,11 @@ int gk20a_channel_resume(struct gk20a *g)
2106 gk20a_dbg_fn(""); 2296 gk20a_dbg_fn("");
2107 2297
2108 for (chid = 0; chid < f->num_channels; chid++) { 2298 for (chid = 0; chid < f->num_channels; chid++) {
2109 if (f->channel[chid].in_use) { 2299 if (gk20a_channel_get(&f->channel[chid])) {
2110 gk20a_dbg_info("resume channel %d", chid); 2300 gk20a_dbg_info("resume channel %d", chid);
2111 g->ops.fifo.bind_channel(&f->channel[chid]); 2301 g->ops.fifo.bind_channel(&f->channel[chid]);
2112 channels_in_use = true; 2302 channels_in_use = true;
2303 gk20a_channel_put(&f->channel[chid]);
2113 } 2304 }
2114 } 2305 }
2115 2306
@@ -2129,10 +2320,11 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2129 2320
2130 for (chid = 0; chid < f->num_channels; chid++) { 2321 for (chid = 0; chid < f->num_channels; chid++) {
2131 struct channel_gk20a *c = g->fifo.channel+chid; 2322 struct channel_gk20a *c = g->fifo.channel+chid;
2132 if (c->in_use) { 2323 if (gk20a_channel_get(c)) {
2133 gk20a_channel_event(c); 2324 gk20a_channel_event(c);
2134 wake_up_interruptible_all(&c->semaphore_wq); 2325 wake_up_interruptible_all(&c->semaphore_wq);
2135 gk20a_channel_update(c, 0); 2326 gk20a_channel_update(c, 0);
2327 gk20a_channel_put(c);
2136 } 2328 }
2137 } 2329 }
2138} 2330}
@@ -2225,10 +2417,18 @@ long gk20a_channel_ioctl(struct file *filp,
2225 return -EFAULT; 2417 return -EFAULT;
2226 } 2418 }
2227 2419
2420 /* take a ref or return timeout if channel refs can't be taken */
2421 ch = gk20a_channel_get(ch);
2422 if (!ch)
2423 return -ETIMEDOUT;
2424
2228 /* protect our sanity for threaded userspace - most of the channel is 2425 /* protect our sanity for threaded userspace - most of the channel is
2229 * not thread safe */ 2426 * not thread safe */
2230 mutex_lock(&ch->ioctl_lock); 2427 mutex_lock(&ch->ioctl_lock);
2231 2428
2429 /* this ioctl call keeps a ref to the file which keeps a ref to the
2430 * channel */
2431
2232 switch (cmd) { 2432 switch (cmd) {
2233 case NVGPU_IOCTL_CHANNEL_OPEN: 2433 case NVGPU_IOCTL_CHANNEL_OPEN:
2234 err = gk20a_channel_open_ioctl(ch->g, 2434 err = gk20a_channel_open_ioctl(ch->g,
@@ -2449,9 +2649,11 @@ long gk20a_channel_ioctl(struct file *filp,
2449 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 2649 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2450 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); 2650 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2451 2651
2452 gk20a_dbg_fn("end");
2453
2454 mutex_unlock(&ch->ioctl_lock); 2652 mutex_unlock(&ch->ioctl_lock);
2455 2653
2654 gk20a_channel_put(ch);
2655
2656 gk20a_dbg_fn("end");
2657
2456 return err; 2658 return err;
2457} 2659}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index f022fe36..2ea5b4be 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -19,12 +19,13 @@
19#define CHANNEL_GK20A_H 19#define CHANNEL_GK20A_H
20 20
21#include <linux/log2.h> 21#include <linux/log2.h>
22#include <linux/slab.h>
23#include <linux/wait.h>
24#include <linux/mutex.h> 22#include <linux/mutex.h>
25#include <uapi/linux/nvgpu.h>
26#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/semaphore.h>
25#include <linux/slab.h>
27#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/wait.h>
28#include <uapi/linux/nvgpu.h>
28 29
29struct gk20a; 30struct gk20a;
30struct gr_gk20a; 31struct gr_gk20a;
@@ -77,8 +78,15 @@ struct channel_gk20a_poll_events {
77 78
78/* this is the priv element of struct nvhost_channel */ 79/* this is the priv element of struct nvhost_channel */
79struct channel_gk20a { 80struct channel_gk20a {
80 struct gk20a *g; 81 struct gk20a *g; /* set only when channel is active */
81 bool in_use; 82
83 struct list_head free_chs;
84
85 spinlock_t ref_obtain_lock;
86 bool referenceable;
87 atomic_t ref_count;
88 wait_queue_head_t ref_count_dec_wq;
89
82 int hw_chid; 90 int hw_chid;
83 bool bound; 91 bool bound;
84 bool first_init; 92 bool first_init;
@@ -171,7 +179,10 @@ static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
171} 179}
172int channel_gk20a_commit_va(struct channel_gk20a *c); 180int channel_gk20a_commit_va(struct channel_gk20a *c);
173int gk20a_init_channel_support(struct gk20a *, u32 chid); 181int gk20a_init_channel_support(struct gk20a *, u32 chid);
174void gk20a_free_channel(struct channel_gk20a *ch, bool finish); 182
183/* must be inside gk20a_busy()..gk20a_idle() */
184void gk20a_channel_close(struct channel_gk20a *ch);
185
175bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, 186bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
176 u32 timeout_delta_ms); 187 u32 timeout_delta_ms);
177void gk20a_disable_channel(struct channel_gk20a *ch, 188void gk20a_disable_channel(struct channel_gk20a *ch,
@@ -202,6 +213,15 @@ void gk20a_channel_event(struct channel_gk20a *ch);
202 213
203void gk20a_init_channel(struct gpu_ops *gops); 214void gk20a_init_channel(struct gpu_ops *gops);
204 215
216/* returns ch if reference was obtained */
217struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch,
218 const char *caller);
219#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__)
220
221
222void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
223#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
224
205int gk20a_wait_channel_idle(struct channel_gk20a *ch); 225int gk20a_wait_channel_idle(struct channel_gk20a *ch);
206struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g); 226struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g);
207struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, 227struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 8cc852c7..7a707fbd 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -154,8 +154,23 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
154 154
155static void gk20a_channel_syncpt_update(void *priv, int nr_completed) 155static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
156{ 156{
157 struct channel_gk20a *ch20a = priv; 157 struct channel_gk20a *ch = priv;
158 gk20a_channel_update(ch20a, nr_completed); 158 struct gk20a *g = ch->g;
159
160 /* need busy for possible channel deletion */
161 if (gk20a_busy(ch->g->dev)) {
162 gk20a_err(dev_from_gk20a(ch->g),
163 "failed to busy while syncpt update");
164 /* Last gk20a_idle()s are in channel_update, so we shouldn't
165 * get here. If we do, the channel is badly broken now */
166 return;
167 }
168
169 /* note: channel_get() is in __gk20a_channel_syncpt_incr() */
170 gk20a_channel_update(ch, nr_completed);
171 gk20a_channel_put(ch);
172
173 gk20a_idle(g->dev);
159} 174}
160 175
161static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 176static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
@@ -209,14 +224,37 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
209 thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); 224 thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
210 225
211 if (register_irq) { 226 if (register_irq) {
212 err = nvhost_intr_register_notifier(sp->host1x_pdev, 227 err = gk20a_busy(c->g->dev);
213 sp->id, thresh, 228 if (err)
214 gk20a_channel_syncpt_update, c); 229 gk20a_err(dev_from_gk20a(c->g),
215 230 "failed to add syncpt interrupt notifier for channel %d",
216 /* Adding interrupt action should never fail. A proper error 231 c->hw_chid);
217 * handling here would require us to decrement the syncpt max 232 else {
218 * back to its original value. */ 233 struct channel_gk20a *referenced = gk20a_channel_get(c);
219 WARN(err, "failed to set submit complete interrupt"); 234
235 WARN_ON(!referenced);
236 gk20a_idle(c->g->dev);
237
238 if (referenced) {
239 /* note: channel_put() is in
240 * gk20a_channel_syncpt_update() */
241
242 err = nvhost_intr_register_notifier(
243 sp->host1x_pdev,
244 sp->id, thresh,
245 gk20a_channel_syncpt_update, c);
246 if (err)
247 gk20a_channel_put(referenced);
248
249 /* Adding interrupt action should
250 * never fail. A proper error handling
251 * here would require us to decrement
252 * the syncpt max back to its original
253 * value. */
254 WARN(err,
255 "failed to set submit complete interrupt");
256 }
257 }
220 } 258 }
221 259
222 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, 260 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 0f1c31dd..bda0dab0 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -36,6 +36,7 @@ static struct platform_device *gk20a_device;
36 36
37struct ch_state { 37struct ch_state {
38 int pid; 38 int pid;
39 int refs;
39 u8 inst_block[0]; 40 u8 inst_block[0];
40}; 41};
41 42
@@ -118,9 +119,10 @@ static void gk20a_debug_show_channel(struct gk20a *g,
118 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); 119 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w());
119 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); 120 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w());
120 121
121 gk20a_debug_output(o, "%d-%s, pid %d: ", hw_chid, 122 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid,
122 g->dev->name, 123 g->dev->name,
123 ch_state->pid); 124 ch_state->pid,
125 ch_state->refs);
124 gk20a_debug_output(o, "%s in use %s %s\n", 126 gk20a_debug_output(o, "%s in use %s %s\n",
125 ccsr_channel_enable_v(channel) ? "" : "not", 127 ccsr_channel_enable_v(channel) ? "" : "not",
126 ccsr_chan_status_str[status], 128 ccsr_chan_status_str[status],
@@ -231,16 +233,30 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
231 } 233 }
232 234
233 for (chid = 0; chid < f->num_channels; chid++) { 235 for (chid = 0; chid < f->num_channels; chid++) {
234 if (f->channel[chid].in_use) 236 struct channel_gk20a *ch = &f->channel[chid];
235 ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL); 237 if (gk20a_channel_get(ch)) {
238 ch_state[chid] =
239 kmalloc(sizeof(struct ch_state) +
240 ram_in_alloc_size_v(), GFP_KERNEL);
241 /* ref taken stays to below loop with
242 * successful allocs */
243 if (!ch_state[chid])
244 gk20a_channel_put(ch);
245 }
236 } 246 }
237 247
238 for (chid = 0; chid < f->num_channels; chid++) { 248 for (chid = 0; chid < f->num_channels; chid++) {
239 if (ch_state[chid] && f->channel[chid].inst_block.cpu_va) { 249 struct channel_gk20a *ch = &f->channel[chid];
240 ch_state[chid]->pid = f->channel[chid].pid; 250 if (ch_state[chid]) {
241 memcpy(&ch_state[chid]->inst_block[0], 251 if (ch->inst_block.cpu_va) {
242 f->channel[chid].inst_block.cpu_va, 252 ch_state[chid]->pid = ch->pid;
243 ram_in_alloc_size_v()); 253 ch_state[chid]->refs =
254 atomic_read(&ch->ref_count);
255 memcpy(&ch_state[chid]->inst_block[0],
256 ch->inst_block.cpu_va,
257 ram_in_alloc_size_v());
258 }
259 gk20a_channel_put(ch);
244 } 260 }
245 } 261 }
246 for (chid = 0; chid < f->num_channels; chid++) { 262 for (chid = 0; chid < f->num_channels; chid++) {
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 56b954a9..4ef310b2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -515,6 +515,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
515 515
516 init_runlist(g, f); 516 init_runlist(g, f);
517 517
518 INIT_LIST_HEAD(&f->free_chs);
519 mutex_init(&f->free_chs_mutex);
520
518 for (chid = 0; chid < f->num_channels; chid++) { 521 for (chid = 0; chid < f->num_channels; chid++) {
519 f->channel[chid].userd_cpu_va = 522 f->channel[chid].userd_cpu_va =
520 f->userd.cpu_va + chid * f->userd_entry_size; 523 f->userd.cpu_va + chid * f->userd_entry_size;
@@ -527,7 +530,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
527 gk20a_init_channel_support(g, chid); 530 gk20a_init_channel_support(g, chid);
528 gk20a_init_tsg_support(g, chid); 531 gk20a_init_tsg_support(g, chid);
529 } 532 }
530 mutex_init(&f->ch_inuse_mutex);
531 mutex_init(&f->tsg_inuse_mutex); 533 mutex_init(&f->tsg_inuse_mutex);
532 534
533 f->remove_support = gk20a_remove_fifo_support; 535 f->remove_support = gk20a_remove_fifo_support;
@@ -637,6 +639,7 @@ int gk20a_init_fifo_support(struct gk20a *g)
637 return err; 639 return err;
638} 640}
639 641
642/* return with a reference to the channel, caller must put it back */
640static struct channel_gk20a * 643static struct channel_gk20a *
641channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) 644channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
642{ 645{
@@ -644,10 +647,16 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
644 if (unlikely(!f->channel)) 647 if (unlikely(!f->channel))
645 return NULL; 648 return NULL;
646 for (ci = 0; ci < f->num_channels; ci++) { 649 for (ci = 0; ci < f->num_channels; ci++) {
647 struct channel_gk20a *c = f->channel+ci; 650 struct channel_gk20a *ch = gk20a_channel_get(&f->channel[ci]);
648 if (c->inst_block.cpu_va && 651 /* only alive channels are searched */
649 (inst_ptr == gk20a_mem_phys(&c->inst_block))) 652 if (!ch)
650 return f->channel+ci; 653 continue;
654
655 if (ch->inst_block.cpu_va &&
656 (inst_ptr == gk20a_mem_phys(&ch->inst_block)))
657 return ch;
658
659 gk20a_channel_put(ch);
651 } 660 }
652 return NULL; 661 return NULL;
653} 662}
@@ -803,6 +812,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
803 return true; 812 return true;
804} 813}
805 814
815/* caller must hold a channel reference */
806static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, 816static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g,
807 struct channel_gk20a *ch) 817 struct channel_gk20a *ch)
808{ 818{
@@ -854,14 +864,38 @@ static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
854 "TSG %d generated a mmu fault", tsg->tsgid); 864 "TSG %d generated a mmu fault", tsg->tsgid);
855 865
856 mutex_lock(&tsg->ch_list_lock); 866 mutex_lock(&tsg->ch_list_lock);
857 list_for_each_entry(ch, &tsg->ch_list, ch_entry) 867 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
858 ret = gk20a_fifo_set_ctx_mmu_error(g, ch); 868 if (gk20a_channel_get(ch)) {
869 if (!gk20a_fifo_set_ctx_mmu_error(g, ch))
870 ret = false;
871 gk20a_channel_put(ch);
872 }
873 }
859 mutex_unlock(&tsg->ch_list_lock); 874 mutex_unlock(&tsg->ch_list_lock);
860 875
861 return ret; 876 return ret;
862} 877}
863 878
864static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) 879static void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid)
880{
881 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
882 struct channel_gk20a *ch;
883
884 mutex_lock(&tsg->ch_list_lock);
885 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
886 if (gk20a_channel_get(ch)) {
887 gk20a_channel_abort(ch);
888 gk20a_channel_put(ch);
889 }
890 }
891 mutex_unlock(&tsg->ch_list_lock);
892}
893
894static bool gk20a_fifo_handle_mmu_fault(
895 struct gk20a *g,
896 u32 mmu_fault_engines, /* queried from HW if 0 */
897 u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
898 bool id_is_tsg)
865{ 899{
866 bool fake_fault; 900 bool fake_fault;
867 unsigned long fault_id; 901 unsigned long fault_id;
@@ -894,10 +928,8 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
894 grfifo_ctl | gr_gpfifo_ctl_access_f(0) | 928 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
895 gr_gpfifo_ctl_semaphore_access_f(0)); 929 gr_gpfifo_ctl_semaphore_access_f(0));
896 930
897 /* If we have recovery in progress, MMU fault id is invalid */ 931 if (mmu_fault_engines) {
898 if (g->fifo.mmu_fault_engines) { 932 fault_id = mmu_fault_engines;
899 fault_id = g->fifo.mmu_fault_engines;
900 g->fifo.mmu_fault_engines = 0;
901 fake_fault = true; 933 fake_fault = true;
902 } else { 934 } else {
903 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); 935 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
@@ -914,6 +946,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
914 struct fifo_mmu_fault_info_gk20a f; 946 struct fifo_mmu_fault_info_gk20a f;
915 struct channel_gk20a *ch = NULL; 947 struct channel_gk20a *ch = NULL;
916 struct tsg_gk20a *tsg = NULL; 948 struct tsg_gk20a *tsg = NULL;
949 struct channel_gk20a *referenced_channel = 0;
917 /* read and parse engine status */ 950 /* read and parse engine status */
918 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); 951 u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id));
919 u32 ctx_status = fifo_engine_status_ctx_status_v(status); 952 u32 ctx_status = fifo_engine_status_ctx_status_v(status);
@@ -953,22 +986,34 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
953 /* get the channel/TSG */ 986 /* get the channel/TSG */
954 if (fake_fault) { 987 if (fake_fault) {
955 /* use next_id if context load is failing */ 988 /* use next_id if context load is failing */
956 u32 id = (ctx_status == 989 u32 id, type;
957 fifo_engine_status_ctx_status_ctxsw_load_v()) ? 990
958 fifo_engine_status_next_id_v(status) : 991 if (hw_id == ~(u32)0) {
959 fifo_engine_status_id_v(status); 992 id = (ctx_status ==
960 u32 type = (ctx_status == 993 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
961 fifo_engine_status_ctx_status_ctxsw_load_v()) ? 994 fifo_engine_status_next_id_v(status) :
962 fifo_engine_status_next_id_type_v(status) : 995 fifo_engine_status_id_v(status);
963 fifo_engine_status_id_type_v(status); 996 type = (ctx_status ==
997 fifo_engine_status_ctx_status_ctxsw_load_v()) ?
998 fifo_engine_status_next_id_type_v(status) :
999 fifo_engine_status_id_type_v(status);
1000 } else {
1001 id = hw_id;
1002 type = id_is_tsg ?
1003 fifo_engine_status_id_type_tsgid_v() :
1004 fifo_engine_status_id_type_chid_v();
1005 }
964 1006
965 if (type == fifo_engine_status_id_type_tsgid_v()) 1007 if (type == fifo_engine_status_id_type_tsgid_v())
966 tsg = &g->fifo.tsg[id]; 1008 tsg = &g->fifo.tsg[id];
967 else if (type == fifo_engine_status_id_type_chid_v()) 1009 else if (type == fifo_engine_status_id_type_chid_v()) {
968 ch = &g->fifo.channel[id]; 1010 ch = &g->fifo.channel[id];
1011 referenced_channel = gk20a_channel_get(ch);
1012 }
969 } else { 1013 } else {
970 /* read channel based on instruction pointer */ 1014 /* read channel based on instruction pointer */
971 ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); 1015 ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
1016 referenced_channel = ch;
972 } 1017 }
973 1018
974 if (ch && gk20a_is_channel_marked_as_tsg(ch)) 1019 if (ch && gk20a_is_channel_marked_as_tsg(ch))
@@ -977,7 +1022,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
977 /* check if engine reset should be deferred */ 1022 /* check if engine reset should be deferred */
978 if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g, 1023 if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g,
979 engine_id, &f, fake_fault)) { 1024 engine_id, &f, fake_fault)) {
980 g->fifo.mmu_fault_engines = fault_id; 1025 g->fifo.deferred_fault_engines = fault_id;
981 1026
982 /* handled during channel free */ 1027 /* handled during channel free */
983 g->fifo.deferred_reset_pending = true; 1028 g->fifo.deferred_reset_pending = true;
@@ -988,19 +1033,31 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
988 * syncpoints */ 1033 * syncpoints */
989 1034
990 if (tsg) { 1035 if (tsg) {
991 struct channel_gk20a *ch = NULL;
992 if (!g->fifo.deferred_reset_pending) 1036 if (!g->fifo.deferred_reset_pending)
993 verbose = 1037 verbose =
994 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); 1038 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
995 mutex_lock(&tsg->ch_list_lock); 1039
996 list_for_each_entry(ch, &tsg->ch_list, ch_entry) 1040 gk20a_fifo_abort_tsg(g, ch->tsgid);
997 gk20a_channel_abort(ch); 1041
998 mutex_unlock(&tsg->ch_list_lock); 1042 /* put back the ref taken early above */
1043 if (referenced_channel) {
1044 gk20a_channel_put(ch);
1045 } else {
1046 gk20a_err(dev_from_gk20a(g),
1047 "mmu error in freed tsg channel %d on tsgid %d",
1048 ch->hw_chid, ch->tsgid);
1049 }
999 } else if (ch) { 1050 } else if (ch) {
1000 if (!g->fifo.deferred_reset_pending) 1051 if (referenced_channel) {
1001 verbose = 1052 if (!g->fifo.deferred_reset_pending)
1002 gk20a_fifo_set_ctx_mmu_error_ch(g, ch); 1053 verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1003 gk20a_channel_abort(ch); 1054 gk20a_channel_abort(ch);
1055 gk20a_channel_put(ch);
1056 } else {
1057 gk20a_err(dev_from_gk20a(g),
1058 "mmu error in freed channel %d",
1059 ch->hw_chid);
1060 }
1004 } else if (f.inst_ptr == 1061 } else if (f.inst_ptr ==
1005 gk20a_mem_phys(&g->mm.bar1.inst_block)) { 1062 gk20a_mem_phys(&g->mm.bar1.inst_block)) {
1006 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); 1063 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
@@ -1133,46 +1190,69 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg)
1133 1190
1134void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) 1191void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
1135{ 1192{
1136 u32 engines = gk20a_fifo_engines_on_id(g, hw_chid, false); 1193 u32 engines;
1194
1195 /* stop context switching to prevent engine assignments from
1196 changing until channel is recovered */
1197 mutex_lock(&g->dbg_sessions_lock);
1198 gr_gk20a_disable_ctxsw(g);
1199
1200 engines = gk20a_fifo_engines_on_id(g, hw_chid, false);
1201
1137 if (engines) 1202 if (engines)
1138 gk20a_fifo_recover(g, engines, verbose); 1203 gk20a_fifo_recover(g, engines, hw_chid, false, verbose);
1139 else { 1204 else {
1140 struct channel_gk20a *ch = 1205 struct channel_gk20a *ch = &g->fifo.channel[hw_chid];
1141 g->fifo.channel + hw_chid;
1142 1206
1143 gk20a_channel_abort(ch); 1207 if (gk20a_channel_get(ch)) {
1208 gk20a_channel_abort(ch);
1144 1209
1145 if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) 1210 if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch))
1146 gk20a_debug_dump(g->dev); 1211 gk20a_debug_dump(g->dev);
1212
1213 gk20a_channel_put(ch);
1214 }
1147 } 1215 }
1216
1217 gr_gk20a_enable_ctxsw(g);
1218 mutex_unlock(&g->dbg_sessions_lock);
1148} 1219}
1149 1220
1150void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) 1221void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
1151{ 1222{
1152 u32 engines = gk20a_fifo_engines_on_id(g, tsgid, true); 1223 u32 engines;
1224
1225 /* stop context switching to prevent engine assignments from
1226 changing until TSG is recovered */
1227 mutex_lock(&g->dbg_sessions_lock);
1228 gr_gk20a_disable_ctxsw(g);
1229
1230 engines = gk20a_fifo_engines_on_id(g, tsgid, true);
1231
1153 if (engines) 1232 if (engines)
1154 gk20a_fifo_recover(g, engines, verbose); 1233 gk20a_fifo_recover(g, engines, tsgid, true, verbose);
1155 else { 1234 else {
1156 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; 1235 struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
1157 struct channel_gk20a *ch;
1158 1236
1159 if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) 1237 if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg))
1160 gk20a_debug_dump(g->dev); 1238 gk20a_debug_dump(g->dev);
1161 1239
1162 mutex_lock(&tsg->ch_list_lock); 1240 gk20a_fifo_abort_tsg(g, tsgid);
1163 list_for_each_entry(ch, &tsg->ch_list, ch_entry)
1164 gk20a_channel_abort(ch);
1165 mutex_unlock(&tsg->ch_list_lock);
1166 } 1241 }
1242
1243 gr_gk20a_enable_ctxsw(g);
1244 mutex_unlock(&g->dbg_sessions_lock);
1167} 1245}
1168 1246
1169void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, 1247void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1170 bool verbose) 1248 u32 hw_id, bool id_is_tsg,
1249 bool verbose)
1171{ 1250{
1172 unsigned long engine_id, i; 1251 unsigned long engine_id, i;
1173 unsigned long _engine_ids = __engine_ids; 1252 unsigned long _engine_ids = __engine_ids;
1174 unsigned long engine_ids = 0; 1253 unsigned long engine_ids = 0;
1175 u32 val; 1254 u32 val;
1255 u32 mmu_fault_engines = 0;
1176 1256
1177 if (verbose) 1257 if (verbose)
1178 gk20a_debug_dump(g->dev); 1258 gk20a_debug_dump(g->dev);
@@ -1181,7 +1261,6 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1181 g->ops.ltc.flush(g); 1261 g->ops.ltc.flush(g);
1182 1262
1183 /* store faulted engines in advance */ 1263 /* store faulted engines in advance */
1184 g->fifo.mmu_fault_engines = 0;
1185 for_each_set_bit(engine_id, &_engine_ids, 32) { 1264 for_each_set_bit(engine_id, &_engine_ids, 32) {
1186 u32 ref_type; 1265 u32 ref_type;
1187 u32 ref_id; 1266 u32 ref_id;
@@ -1196,11 +1275,10 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1196 gk20a_fifo_get_faulty_id_type(g, i, &id, &type); 1275 gk20a_fifo_get_faulty_id_type(g, i, &id, &type);
1197 if (ref_type == type && ref_id == id) { 1276 if (ref_type == type && ref_id == id) {
1198 engine_ids |= BIT(i); 1277 engine_ids |= BIT(i);
1199 g->fifo.mmu_fault_engines |= 1278 mmu_fault_engines |=
1200 BIT(gk20a_engine_id_to_mmu_id(i)); 1279 BIT(gk20a_engine_id_to_mmu_id(i));
1201 } 1280 }
1202 } 1281 }
1203
1204 } 1282 }
1205 1283
1206 /* 1284 /*
@@ -1214,7 +1292,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1214 fifo_intr_0_sched_error_reset_f()); 1292 fifo_intr_0_sched_error_reset_f());
1215 1293
1216 g->ops.fifo.trigger_mmu_fault(g, engine_ids); 1294 g->ops.fifo.trigger_mmu_fault(g, engine_ids);
1217 gk20a_fifo_handle_mmu_fault(g); 1295 gk20a_fifo_handle_mmu_fault(g, engine_ids, hw_id, id_is_tsg);
1218 1296
1219 val = gk20a_readl(g, fifo_intr_en_0_r()); 1297 val = gk20a_readl(g, fifo_intr_en_0_r());
1220 val |= fifo_intr_en_0_mmu_fault_f(1) 1298 val |= fifo_intr_en_0_mmu_fault_f(1)
@@ -1222,25 +1300,32 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
1222 gk20a_writel(g, fifo_intr_en_0_r(), val); 1300 gk20a_writel(g, fifo_intr_en_0_r(), val);
1223} 1301}
1224 1302
1303/* force reset channel and tsg (if it's part of one) */
1225int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) 1304int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose)
1226{ 1305{
1227 struct tsg_gk20a *tsg = NULL; 1306 struct tsg_gk20a *tsg = NULL;
1228 struct channel_gk20a *ch_tsg = NULL; 1307 struct channel_gk20a *ch_tsg = NULL;
1308 struct gk20a *g = ch->g;
1229 1309
1230 if (gk20a_is_channel_marked_as_tsg(ch)) { 1310 if (gk20a_is_channel_marked_as_tsg(ch)) {
1231 tsg = &ch->g->fifo.tsg[ch->hw_chid]; 1311 tsg = &g->fifo.tsg[ch->hw_chid];
1232 1312
1233 mutex_lock(&tsg->ch_list_lock); 1313 mutex_lock(&tsg->ch_list_lock);
1314
1234 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { 1315 list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) {
1235 gk20a_set_error_notifier(ch_tsg, 1316 if (gk20a_channel_get(ch_tsg)) {
1236 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); 1317 gk20a_set_error_notifier(ch_tsg,
1318 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR);
1319 gk20a_channel_put(ch_tsg);
1320 }
1237 } 1321 }
1322
1238 mutex_unlock(&tsg->ch_list_lock); 1323 mutex_unlock(&tsg->ch_list_lock);
1239 gk20a_fifo_recover_tsg(ch->g, ch->tsgid, verbose); 1324 gk20a_fifo_recover_tsg(g, ch->tsgid, verbose);
1240 } else { 1325 } else {
1241 gk20a_set_error_notifier(ch, 1326 gk20a_set_error_notifier(ch,
1242 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); 1327 NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR);
1243 gk20a_fifo_recover_ch(ch->g, ch->hw_chid, verbose); 1328 gk20a_fifo_recover_ch(g, ch->hw_chid, verbose);
1244 } 1329 }
1245 1330
1246 return 0; 1331 return 0;
@@ -1300,11 +1385,14 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1300 struct channel_gk20a *ch = &f->channel[id]; 1385 struct channel_gk20a *ch = &f->channel[id];
1301 1386
1302 if (non_chid) { 1387 if (non_chid) {
1303 gk20a_fifo_recover(g, BIT(engine_id), true); 1388 gk20a_fifo_recover(g, BIT(engine_id), id, true, true);
1304 ret = true; 1389 ret = true;
1305 goto err; 1390 goto err;
1306 } 1391 }
1307 1392
1393 if (!gk20a_channel_get(ch))
1394 goto err;
1395
1308 if (gk20a_channel_update_and_check_timeout(ch, 1396 if (gk20a_channel_update_and_check_timeout(ch,
1309 GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { 1397 GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) {
1310 gk20a_set_error_notifier(ch, 1398 gk20a_set_error_notifier(ch,
@@ -1313,7 +1401,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1313 "fifo sched ctxsw timeout error:" 1401 "fifo sched ctxsw timeout error:"
1314 "engine = %u, ch = %d", engine_id, id); 1402 "engine = %u, ch = %d", engine_id, id);
1315 gk20a_gr_debug_dump(g->dev); 1403 gk20a_gr_debug_dump(g->dev);
1316 gk20a_fifo_recover(g, BIT(engine_id), 1404 gk20a_fifo_recover(g, BIT(engine_id), id, false,
1317 ch->timeout_debug_dump); 1405 ch->timeout_debug_dump);
1318 ret = true; 1406 ret = true;
1319 } else { 1407 } else {
@@ -1324,6 +1412,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
1324 id); 1412 id);
1325 ret = false; 1413 ret = false;
1326 } 1414 }
1415 gk20a_channel_put(ch);
1327 return ret; 1416 return ret;
1328 } 1417 }
1329 1418
@@ -1336,7 +1425,7 @@ err:
1336 1425
1337static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) 1426static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
1338{ 1427{
1339 bool print_channel_reset_log = false, reset_engine = false; 1428 bool print_channel_reset_log = false;
1340 struct device *dev = dev_from_gk20a(g); 1429 struct device *dev = dev_from_gk20a(g);
1341 u32 handled = 0; 1430 u32 handled = 0;
1342 1431
@@ -1367,8 +1456,8 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
1367 } 1456 }
1368 1457
1369 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { 1458 if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) {
1370 print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g); 1459 print_channel_reset_log =
1371 reset_engine = true; 1460 gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false);
1372 handled |= fifo_intr_0_mmu_fault_pending_f(); 1461 handled |= fifo_intr_0_mmu_fault_pending_f();
1373 } 1462 }
1374 1463
@@ -1452,9 +1541,12 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
1452 == fifo_pbdma_status_id_type_chid_v()) { 1541 == fifo_pbdma_status_id_type_chid_v()) {
1453 struct channel_gk20a *ch = &f->channel[id]; 1542 struct channel_gk20a *ch = &f->channel[id];
1454 1543
1455 gk20a_set_error_notifier(ch, 1544 if (gk20a_channel_get(ch)) {
1456 NVGPU_CHANNEL_PBDMA_ERROR); 1545 gk20a_set_error_notifier(ch,
1457 gk20a_fifo_recover_ch(g, id, true); 1546 NVGPU_CHANNEL_PBDMA_ERROR);
1547 gk20a_fifo_recover_ch(g, id, true);
1548 gk20a_channel_put(ch);
1549 }
1458 } else if (fifo_pbdma_status_id_type_v(status) 1550 } else if (fifo_pbdma_status_id_type_v(status)
1459 == fifo_pbdma_status_id_type_tsgid_v()) { 1551 == fifo_pbdma_status_id_type_tsgid_v()) {
1460 struct tsg_gk20a *tsg = &f->tsg[id]; 1552 struct tsg_gk20a *tsg = &f->tsg[id];
@@ -1462,8 +1554,11 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
1462 1554
1463 mutex_lock(&tsg->ch_list_lock); 1555 mutex_lock(&tsg->ch_list_lock);
1464 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 1556 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
1465 gk20a_set_error_notifier(ch, 1557 if (gk20a_channel_get(ch)) {
1466 NVGPU_CHANNEL_PBDMA_ERROR); 1558 gk20a_set_error_notifier(ch,
1559 NVGPU_CHANNEL_PBDMA_ERROR);
1560 gk20a_channel_put(ch);
1561 }
1467 } 1562 }
1468 mutex_unlock(&tsg->ch_list_lock); 1563 mutex_unlock(&tsg->ch_list_lock);
1469 gk20a_fifo_recover_tsg(g, id, true); 1564 gk20a_fifo_recover_tsg(g, id, true);
@@ -1559,6 +1654,8 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1559 + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); 1654 + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
1560 u32 ret = 0; 1655 u32 ret = 0;
1561 1656
1657 gk20a_dbg_fn("%d", id);
1658
1562 /* issue preempt */ 1659 /* issue preempt */
1563 if (is_tsg) 1660 if (is_tsg)
1564 gk20a_writel(g, fifo_preempt_r(), 1661 gk20a_writel(g, fifo_preempt_r(),
@@ -1569,6 +1666,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1569 fifo_preempt_chid_f(id) | 1666 fifo_preempt_chid_f(id) |
1570 fifo_preempt_type_channel_f()); 1667 fifo_preempt_type_channel_f());
1571 1668
1669 gk20a_dbg_fn("%d", id);
1572 /* wait for preempt */ 1670 /* wait for preempt */
1573 ret = -EBUSY; 1671 ret = -EBUSY;
1574 do { 1672 do {
@@ -1583,6 +1681,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1583 } while (time_before(jiffies, end_jiffies) || 1681 } while (time_before(jiffies, end_jiffies) ||
1584 !tegra_platform_is_silicon()); 1682 !tegra_platform_is_silicon());
1585 1683
1684 gk20a_dbg_fn("%d", id);
1586 if (ret) { 1685 if (ret) {
1587 if (is_tsg) { 1686 if (is_tsg) {
1588 struct tsg_gk20a *tsg = &g->fifo.tsg[id]; 1687 struct tsg_gk20a *tsg = &g->fifo.tsg[id];
@@ -1593,8 +1692,11 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1593 1692
1594 mutex_lock(&tsg->ch_list_lock); 1693 mutex_lock(&tsg->ch_list_lock);
1595 list_for_each_entry(ch, &tsg->ch_list, ch_entry) { 1694 list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
1695 if (!gk20a_channel_get(ch))
1696 continue;
1596 gk20a_set_error_notifier(ch, 1697 gk20a_set_error_notifier(ch,
1597 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 1698 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
1699 gk20a_channel_put(ch);
1598 } 1700 }
1599 mutex_unlock(&tsg->ch_list_lock); 1701 mutex_unlock(&tsg->ch_list_lock);
1600 gk20a_fifo_recover_tsg(g, id, true); 1702 gk20a_fifo_recover_tsg(g, id, true);
@@ -1604,9 +1706,12 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
1604 gk20a_err(dev_from_gk20a(g), 1706 gk20a_err(dev_from_gk20a(g),
1605 "preempt channel %d timeout\n", id); 1707 "preempt channel %d timeout\n", id);
1606 1708
1607 gk20a_set_error_notifier(ch, 1709 if (gk20a_channel_get(ch)) {
1608 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); 1710 gk20a_set_error_notifier(ch,
1609 gk20a_fifo_recover_ch(g, id, true); 1711 NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
1712 gk20a_fifo_recover_ch(g, id, true);
1713 gk20a_channel_put(ch);
1714 }
1610 } 1715 }
1611 } 1716 }
1612 1717
@@ -1790,7 +1895,9 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id)
1790 (f->engine_info[i].runlist_id == runlist_id)) 1895 (f->engine_info[i].runlist_id == runlist_id))
1791 engines |= BIT(i); 1896 engines |= BIT(i);
1792 } 1897 }
1793 gk20a_fifo_recover(g, engines, true); 1898
1899 if (engines)
1900 gk20a_fifo_recover(g, engines, ~(u32)0, false, true);
1794} 1901}
1795 1902
1796static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) 1903static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
@@ -1994,6 +2101,8 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid,
1994 u32 mutex_ret; 2101 u32 mutex_ret;
1995 u32 ret = 0; 2102 u32 ret = 0;
1996 2103
2104 gk20a_dbg_fn("");
2105
1997 runlist = &f->runlist_info[runlist_id]; 2106 runlist = &f->runlist_info[runlist_id];
1998 2107
1999 mutex_lock(&runlist->mutex); 2108 mutex_lock(&runlist->mutex);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index dd320ae1..fdf843d2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A graphics fifo (gr host) 4 * GK20A graphics fifo (gr host)
5 * 5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -106,7 +106,9 @@ struct fifo_gk20a {
106 u32 userd_entry_size; 106 u32 userd_entry_size;
107 107
108 struct channel_gk20a *channel; 108 struct channel_gk20a *channel;
109 struct mutex ch_inuse_mutex; /* protect unused chid look up */ 109 /* zero-kref'd channels here */
110 struct list_head free_chs;
111 struct mutex free_chs_mutex;
110 112
111 struct tsg_gk20a *tsg; 113 struct tsg_gk20a *tsg;
112 struct mutex tsg_inuse_mutex; 114 struct mutex tsg_inuse_mutex;
@@ -130,7 +132,7 @@ struct fifo_gk20a {
130 132
131 } intr; 133 } intr;
132 134
133 u32 mmu_fault_engines; 135 u32 deferred_fault_engines;
134 bool deferred_reset_pending; 136 bool deferred_reset_pending;
135 struct mutex deferred_reset_mutex; 137 struct mutex deferred_reset_mutex;
136}; 138};
@@ -157,7 +159,12 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid,
157int gk20a_fifo_suspend(struct gk20a *g); 159int gk20a_fifo_suspend(struct gk20a *g);
158 160
159bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); 161bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
160void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); 162
163void gk20a_fifo_recover(struct gk20a *g,
164 u32 engine_ids, /* if zero, will be queried from HW */
165 u32 hw_id, /* if ~0, will be queried from HW */
166 bool hw_id_is_tsg, /* ignored if hw_id == ~0 */
167 bool verbose);
161void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); 168void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose);
162void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); 169void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose);
163int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); 170int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 9c201f32..498de7e7 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1388,6 +1388,9 @@ static int gk20a_probe(struct platform_device *dev)
1388 return -ENOMEM; 1388 return -ENOMEM;
1389 } 1389 }
1390 1390
1391 init_waitqueue_head(&gk20a->sw_irq_stall_last_handled_wq);
1392 init_waitqueue_head(&gk20a->sw_irq_nonstall_last_handled_wq);
1393
1391#ifdef CONFIG_PM_GENERIC_DOMAINS_OF 1394#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
1392 gk20a_domain = container_of(dev_to_genpd(&dev->dev), 1395 gk20a_domain = container_of(dev_to_genpd(&dev->dev),
1393 struct gk20a_domain_data, gpd); 1396 struct gk20a_domain_data, gpd);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a52d97f3..d8e3586f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -538,6 +538,15 @@ struct gk20a {
538 u32 max_ltc_count; 538 u32 max_ltc_count;
539 u32 ltc_count; 539 u32 ltc_count;
540 540
541 atomic_t hw_irq_stall_count;
542 atomic_t hw_irq_nonstall_count;
543
544 atomic_t sw_irq_stall_last_handled;
545 wait_queue_head_t sw_irq_stall_last_handled_wq;
546
547 atomic_t sw_irq_nonstall_last_handled;
548 wait_queue_head_t sw_irq_nonstall_last_handled_wq;
549
541 struct devfreq *devfreq; 550 struct devfreq *devfreq;
542 551
543 struct gk20a_scale_profile *scale_profile; 552 struct gk20a_scale_profile *scale_profile;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index b2fea5b8..edd4c6c8 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g,
5138 * Also used by regops to translate current ctx to chid and tsgid. 5138 * Also used by regops to translate current ctx to chid and tsgid.
5139 * For performance, we don't want to go through 128 channels every time. 5139 * For performance, we don't want to go through 128 channels every time.
5140 * curr_ctx should be the value read from gr_fecs_current_ctx_r(). 5140 * curr_ctx should be the value read from gr_fecs_current_ctx_r().
5141 * A small tlb is used here to cache translation */ 5141 * A small tlb is used here to cache translation.
5142static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, 5142 *
5143 int *curr_tsgid) 5143 * Returned channel must be freed with gk20a_channel_put() */
5144static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
5145 struct gk20a *g, u32 curr_ctx, int *curr_tsgid)
5144{ 5146{
5145 struct fifo_gk20a *f = &g->fifo; 5147 struct fifo_gk20a *f = &g->fifo;
5146 struct gr_gk20a *gr = &g->gr; 5148 struct gr_gk20a *gr = &g->gr;
5147 u32 chid = -1; 5149 u32 chid = -1;
5148 int tsgid = NVGPU_INVALID_TSG_ID; 5150 int tsgid = NVGPU_INVALID_TSG_ID;
5149 u32 i; 5151 u32 i;
5152 struct channel_gk20a *ret = NULL;
5150 5153
5151 /* when contexts are unloaded from GR, the valid bit is reset 5154 /* when contexts are unloaded from GR, the valid bit is reset
5152 * but the instance pointer information remains intact. So the 5155 * but the instance pointer information remains intact. So the
5153 * valid bit must be checked to be absolutely certain that a 5156 * valid bit must be checked to be absolutely certain that a
5154 * valid context is currently resident. */ 5157 * valid context is currently resident. */
5155 if (!gr_fecs_current_ctx_valid_v(curr_ctx)) 5158 if (!gr_fecs_current_ctx_valid_v(curr_ctx))
5156 return -1; 5159 return NULL;
5157 5160
5158 spin_lock(&gr->ch_tlb_lock); 5161 spin_lock(&gr->ch_tlb_lock);
5159 5162
@@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5162 if (gr->chid_tlb[i].curr_ctx == curr_ctx) { 5165 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
5163 chid = gr->chid_tlb[i].hw_chid; 5166 chid = gr->chid_tlb[i].hw_chid;
5164 tsgid = gr->chid_tlb[i].tsgid; 5167 tsgid = gr->chid_tlb[i].tsgid;
5168 ret = gk20a_channel_get(&f->channel[chid]);
5165 goto unlock; 5169 goto unlock;
5166 } 5170 }
5167 } 5171 }
5168 5172
5169 /* slow path */ 5173 /* slow path */
5170 for (chid = 0; chid < f->num_channels; chid++) 5174 for (chid = 0; chid < f->num_channels; chid++) {
5171 if (f->channel[chid].in_use) { 5175 struct channel_gk20a *ch = &f->channel[chid];
5172 if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> 5176 if (!gk20a_channel_get(ch))
5173 ram_in_base_shift_v()) == 5177 continue;
5178
5179 if ((u32)(gk20a_mem_phys(&ch->inst_block) >>
5180 ram_in_base_shift_v()) ==
5174 gr_fecs_current_ctx_ptr_v(curr_ctx)) { 5181 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5175 tsgid = f->channel[chid].tsgid; 5182 tsgid = ch->tsgid;
5176 break; 5183 /* found it */
5177 } 5184 ret = ch;
5185 break;
5186 }
5187 gk20a_channel_put(ch);
5178 } 5188 }
5179 5189
5180 if (chid >= f->num_channels) { 5190 if (!ret)
5181 chid = -1;
5182 goto unlock; 5191 goto unlock;
5183 }
5184 5192
5185 /* add to free tlb entry */ 5193 /* add to free tlb entry */
5186 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { 5194 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
@@ -5205,7 +5213,7 @@ unlock:
5205 spin_unlock(&gr->ch_tlb_lock); 5213 spin_unlock(&gr->ch_tlb_lock);
5206 if (curr_tsgid) 5214 if (curr_tsgid)
5207 *curr_tsgid = tsgid; 5215 *curr_tsgid = tsgid;
5208 return chid; 5216 return ret;
5209} 5217}
5210 5218
5211int gk20a_gr_lock_down_sm(struct gk20a *g, 5219int gk20a_gr_lock_down_sm(struct gk20a *g,
@@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g)
5399 u32 obj_table; 5407 u32 obj_table;
5400 int need_reset = 0; 5408 int need_reset = 0;
5401 u32 gr_intr = gk20a_readl(g, gr_intr_r()); 5409 u32 gr_intr = gk20a_readl(g, gr_intr_r());
5410 struct channel_gk20a *ch = NULL;
5402 5411
5403 gk20a_dbg_fn(""); 5412 gk20a_dbg_fn("");
5404 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); 5413 gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr);
@@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g)
5424 gr_fe_object_table_r(isr_data.sub_chan)) : 0; 5433 gr_fe_object_table_r(isr_data.sub_chan)) : 0;
5425 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); 5434 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
5426 5435
5427 isr_data.chid = 5436 ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL);
5428 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); 5437 if (!ch) {
5429 if (isr_data.chid == -1) {
5430 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", 5438 gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
5431 isr_data.curr_ctx); 5439 isr_data.curr_ctx);
5432 goto clean_up; 5440 goto clean_up;
5433 } 5441 }
5442 isr_data.chid = ch->hw_chid;
5434 5443
5435 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5444 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5436 "channel %d: addr 0x%08x, " 5445 "channel %d: addr 0x%08x, "
@@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g)
5512 5521
5513 if (gr_intr & gr_intr_exception_pending_f()) { 5522 if (gr_intr & gr_intr_exception_pending_f()) {
5514 u32 exception = gk20a_readl(g, gr_exception_r()); 5523 u32 exception = gk20a_readl(g, gr_exception_r());
5515 struct fifo_gk20a *f = &g->fifo;
5516 struct channel_gk20a *ch = &f->channel[isr_data.chid];
5517 5524
5518 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); 5525 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception);
5519 5526
@@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g)
5572 } 5579 }
5573 5580
5574 if (need_reset) 5581 if (need_reset)
5575 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); 5582 gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A),
5583 ~(u32)0, false, true);
5576 5584
5577clean_up: 5585clean_up:
5586 if (gr_intr && !ch) {
5587 /* Clear interrupts for unused channel. This is
5588 probably an interrupt during gk20a_free_channel() */
5589 gk20a_err(dev_from_gk20a(g),
5590 "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing",
5591 gr_intr);
5592 gk20a_writel(g, gr_intr_r(), gr_intr);
5593 gr_intr = 0;
5594 }
5595
5578 gk20a_writel(g, gr_gpfifo_ctl_r(), 5596 gk20a_writel(g, gr_gpfifo_ctl_r(),
5579 grfifo_ctl | gr_gpfifo_ctl_access_f(1) | 5597 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
5580 gr_gpfifo_ctl_semaphore_access_f(1)); 5598 gr_gpfifo_ctl_semaphore_access_f(1));
@@ -5583,6 +5601,9 @@ clean_up:
5583 gk20a_err(dev_from_gk20a(g), 5601 gk20a_err(dev_from_gk20a(g),
5584 "unhandled gr interrupt 0x%08x", gr_intr); 5602 "unhandled gr interrupt 0x%08x", gr_intr);
5585 5603
5604 if (ch)
5605 gk20a_channel_put(ch);
5606
5586 return 0; 5607 return 0;
5587} 5608}
5588 5609
@@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
6670 6691
6671bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) 6692bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
6672{ 6693{
6673 int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; 6694 int curr_gr_ctx, curr_gr_tsgid;
6674 struct gk20a *g = ch->g; 6695 struct gk20a *g = ch->g;
6696 struct channel_gk20a *curr_ch;
6697 bool ret = false;
6675 6698
6676 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); 6699 curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
6677 curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, 6700 curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx,
6678 &curr_gr_tsgid); 6701 &curr_gr_tsgid);
6679 6702
6680 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, 6703 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
6681 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" 6704 "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d"
6682 " ch->hw_chid=%d", curr_gr_chid, 6705 " ch->hw_chid=%d",
6683 curr_gr_tsgid, ch->tsgid, ch->hw_chid); 6706 curr_ch ? curr_ch->hw_chid : -1,
6684 6707 curr_gr_tsgid,
6685 if (curr_gr_chid == -1) 6708 ch->tsgid,
6709 ch->hw_chid);
6710
6711 if (!curr_ch)
6686 return false; 6712 return false;
6687 6713
6688 if (ch->hw_chid == curr_gr_chid) 6714 if (ch->hw_chid == curr_ch->hw_chid)
6689 return true; 6715 ret = true;
6690 6716
6691 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) 6717 if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid))
6692 return true; 6718 ret = true;
6693 6719
6694 return false; 6720 gk20a_channel_put(curr_ch);
6721 return ret;
6695} 6722}
6696 6723
6697int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 6724int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
index 06b00a25..0a773d10 100644
--- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c
@@ -40,6 +40,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g)
40 /* flush previous write */ 40 /* flush previous write */
41 gk20a_readl(g, mc_intr_en_0_r()); 41 gk20a_readl(g, mc_intr_en_0_r());
42 42
43 atomic_inc(&g->hw_irq_stall_count);
44
43 trace_mc_gk20a_intr_stall_done(g->dev->name); 45 trace_mc_gk20a_intr_stall_done(g->dev->name);
44 46
45 return IRQ_WAKE_THREAD; 47 return IRQ_WAKE_THREAD;
@@ -63,18 +65,22 @@ irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g)
63 /* flush previous write */ 65 /* flush previous write */
64 gk20a_readl(g, mc_intr_en_1_r()); 66 gk20a_readl(g, mc_intr_en_1_r());
65 67
68 atomic_inc(&g->hw_irq_nonstall_count);
69
66 return IRQ_WAKE_THREAD; 70 return IRQ_WAKE_THREAD;
67} 71}
68 72
69irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) 73irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
70{ 74{
71 u32 mc_intr_0; 75 u32 mc_intr_0;
76 int hw_irq_count;
72 77
73 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); 78 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
74 79
75 trace_mc_gk20a_intr_thread_stall(g->dev->name); 80 trace_mc_gk20a_intr_thread_stall(g->dev->name);
76 81
77 mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); 82 mc_intr_0 = gk20a_readl(g, mc_intr_0_r());
83 hw_irq_count = atomic_read(&g->hw_irq_stall_count);
78 84
79 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); 85 gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0);
80 86
@@ -94,12 +100,17 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
94 if (mc_intr_0 & mc_intr_0_pbus_pending_f()) 100 if (mc_intr_0 & mc_intr_0_pbus_pending_f())
95 gk20a_pbus_isr(g); 101 gk20a_pbus_isr(g);
96 102
103 /* sync handled irq counter before re-enabling interrupts */
104 atomic_set(&g->sw_irq_stall_last_handled, hw_irq_count);
105
97 gk20a_writel(g, mc_intr_en_0_r(), 106 gk20a_writel(g, mc_intr_en_0_r(),
98 mc_intr_en_0_inta_hardware_f()); 107 mc_intr_en_0_inta_hardware_f());
99 108
100 /* flush previous write */ 109 /* flush previous write */
101 gk20a_readl(g, mc_intr_en_0_r()); 110 gk20a_readl(g, mc_intr_en_0_r());
102 111
112 wake_up_all(&g->sw_irq_stall_last_handled_wq);
113
103 trace_mc_gk20a_intr_thread_stall_done(g->dev->name); 114 trace_mc_gk20a_intr_thread_stall_done(g->dev->name);
104 115
105 return IRQ_HANDLED; 116 return IRQ_HANDLED;
@@ -108,10 +119,12 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g)
108irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) 119irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
109{ 120{
110 u32 mc_intr_1; 121 u32 mc_intr_1;
122 int hw_irq_count;
111 123
112 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); 124 gk20a_dbg(gpu_dbg_intr, "interrupt thread launched");
113 125
114 mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); 126 mc_intr_1 = gk20a_readl(g, mc_intr_1_r());
127 hw_irq_count = atomic_read(&g->hw_irq_nonstall_count);
115 128
116 gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); 129 gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1);
117 130
@@ -125,12 +138,17 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g)
125 && g->ops.ce2.isr_nonstall) 138 && g->ops.ce2.isr_nonstall)
126 g->ops.ce2.isr_nonstall(g); 139 g->ops.ce2.isr_nonstall(g);
127 140
141 /* sync handled irq counter before re-enabling interrupts */
142 atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count);
143
128 gk20a_writel(g, mc_intr_en_1_r(), 144 gk20a_writel(g, mc_intr_en_1_r(),
129 mc_intr_en_1_inta_hardware_f()); 145 mc_intr_en_1_inta_hardware_f());
130 146
131 /* flush previous write */ 147 /* flush previous write */
132 gk20a_readl(g, mc_intr_en_1_r()); 148 gk20a_readl(g, mc_intr_en_1_r());
133 149
150 wake_up_all(&g->sw_irq_stall_last_handled_wq);
151
134 return IRQ_HANDLED; 152 return IRQ_HANDLED;
135} 153}
136 154
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 68a31eca..23ff8677 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -283,6 +283,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
283 283
284 init_runlist(g, f); 284 init_runlist(g, f);
285 285
286 INIT_LIST_HEAD(&f->free_chs);
287 mutex_init(&f->free_chs_mutex);
288
286 for (chid = 0; chid < f->num_channels; chid++) { 289 for (chid = 0; chid < f->num_channels; chid++) {
287 f->channel[chid].userd_cpu_va = 290 f->channel[chid].userd_cpu_va =
288 f->userd.cpu_va + chid * f->userd_entry_size; 291 f->userd.cpu_va + chid * f->userd_entry_size;
@@ -294,7 +297,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
294 297
295 gk20a_init_channel_support(g, chid); 298 gk20a_init_channel_support(g, chid);
296 } 299 }
297 mutex_init(&f->ch_inuse_mutex);
298 300
299 f->deferred_reset_pending = false; 301 f->deferred_reset_pending = false;
300 mutex_init(&f->deferred_reset_mutex); 302 mutex_init(&f->deferred_reset_mutex);
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index ad738f43..461ff6e8 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -140,12 +140,54 @@ DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done,
140 TP_ARGS(name) 140 TP_ARGS(name)
141); 141);
142 142
143TRACE_EVENT(gk20a_channel_update, 143DECLARE_EVENT_CLASS(gk20a_channel,
144 TP_PROTO(const void *channel), 144 TP_PROTO(int channel),
145 TP_ARGS(channel), 145 TP_ARGS(channel),
146 TP_STRUCT__entry(__field(const void *, channel)), 146 TP_STRUCT__entry(__field(int, channel)),
147 TP_fast_assign(__entry->channel = channel;), 147 TP_fast_assign(__entry->channel = channel;),
148 TP_printk("channel=%p", __entry->channel) 148 TP_printk("ch id %d", __entry->channel)
149);
150DEFINE_EVENT(gk20a_channel, gk20a_channel_update,
151 TP_PROTO(int channel),
152 TP_ARGS(channel)
153);
154DEFINE_EVENT(gk20a_channel, gk20a_free_channel,
155 TP_PROTO(int channel),
156 TP_ARGS(channel)
157);
158DEFINE_EVENT(gk20a_channel, gk20a_open_new_channel,
159 TP_PROTO(int channel),
160 TP_ARGS(channel)
161);
162DEFINE_EVENT(gk20a_channel, gk20a_release_used_channel,
163 TP_PROTO(int channel),
164 TP_ARGS(channel)
165);
166
167DECLARE_EVENT_CLASS(gk20a_channel_getput,
168 TP_PROTO(int channel, const char *caller),
169 TP_ARGS(channel, caller),
170 TP_STRUCT__entry(
171 __field(int, channel)
172 __field(const char *, caller)
173 ),
174 TP_fast_assign(
175 __entry->channel = channel;
176 __entry->caller = caller;
177 ),
178 TP_printk("channel %d caller %s", __entry->channel, __entry->caller)
179);
180DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_get,
181 TP_PROTO(int channel, const char *caller),
182 TP_ARGS(channel, caller)
183);
184DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put,
185 TP_PROTO(int channel, const char *caller),
186 TP_ARGS(channel, caller)
187);
188DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put_nofree,
189 TP_PROTO(int channel, const char *caller),
190 TP_ARGS(channel, caller)
149); 191);
150 192
151TRACE_EVENT(gk20a_push_cmdbuf, 193TRACE_EVENT(gk20a_push_cmdbuf,