diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 302 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 32 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 58 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 247 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 93 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mc_gk20a.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 4 |
12 files changed, 635 insertions, 184 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 4a3076b5..b4fdfb44 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Color decompression engine support | 2 | * Color decompression engine support |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. | 4 | * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -74,7 +74,7 @@ __must_hold(&cde_app->mutex) | |||
74 | trace_gk20a_cde_remove_ctx(cde_ctx); | 74 | trace_gk20a_cde_remove_ctx(cde_ctx); |
75 | 75 | ||
76 | /* free the channel */ | 76 | /* free the channel */ |
77 | gk20a_free_channel(cde_ctx->ch, true); | 77 | gk20a_channel_close(ch); |
78 | 78 | ||
79 | /* ..then release mapped memory */ | 79 | /* ..then release mapped memory */ |
80 | gk20a_deinit_cde_img(cde_ctx); | 80 | gk20a_deinit_cde_img(cde_ctx); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index c12f196d..5a71e874 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -42,8 +42,8 @@ | |||
42 | 42 | ||
43 | #define NVMAP_HANDLE_PARAM_SIZE 1 | 43 | #define NVMAP_HANDLE_PARAM_SIZE 1 |
44 | 44 | ||
45 | static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f); | 45 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); |
46 | static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c); | 46 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); |
47 | 47 | ||
48 | static void free_priv_cmdbuf(struct channel_gk20a *c, | 48 | static void free_priv_cmdbuf(struct channel_gk20a *c, |
49 | struct priv_cmd_entry *e); | 49 | struct priv_cmd_entry *e); |
@@ -61,29 +61,33 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c, | |||
61 | bool add); | 61 | bool add); |
62 | static void gk20a_free_error_notifiers(struct channel_gk20a *ch); | 62 | static void gk20a_free_error_notifiers(struct channel_gk20a *ch); |
63 | 63 | ||
64 | static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f) | 64 | /* allocate GPU channel */ |
65 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | ||
65 | { | 66 | { |
66 | struct channel_gk20a *ch = NULL; | 67 | struct channel_gk20a *ch = NULL; |
67 | int chid; | ||
68 | 68 | ||
69 | mutex_lock(&f->ch_inuse_mutex); | 69 | mutex_lock(&f->free_chs_mutex); |
70 | for (chid = 0; chid < f->num_channels; chid++) { | 70 | if (!list_empty(&f->free_chs)) { |
71 | if (!f->channel[chid].in_use) { | 71 | ch = list_first_entry(&f->free_chs, struct channel_gk20a, |
72 | f->channel[chid].in_use = true; | 72 | free_chs); |
73 | ch = &f->channel[chid]; | 73 | list_del(&ch->free_chs); |
74 | break; | 74 | WARN_ON(atomic_read(&ch->ref_count)); |
75 | } | 75 | WARN_ON(ch->referenceable); |
76 | } | 76 | } |
77 | mutex_unlock(&f->ch_inuse_mutex); | 77 | mutex_unlock(&f->free_chs_mutex); |
78 | 78 | ||
79 | return ch; | 79 | return ch; |
80 | } | 80 | } |
81 | 81 | ||
82 | static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c) | 82 | static void free_channel(struct fifo_gk20a *f, |
83 | struct channel_gk20a *ch) | ||
83 | { | 84 | { |
84 | mutex_lock(&f->ch_inuse_mutex); | 85 | trace_gk20a_release_used_channel(ch->hw_chid); |
85 | f->channel[c->hw_chid].in_use = false; | 86 | /* refcount is zero here and channel is in a freed/dead state */ |
86 | mutex_unlock(&f->ch_inuse_mutex); | 87 | mutex_lock(&f->free_chs_mutex); |
88 | /* add to head to increase visibility of timing-related bugs */ | ||
89 | list_add(&ch->free_chs, &f->free_chs); | ||
90 | mutex_unlock(&f->free_chs_mutex); | ||
87 | } | 91 | } |
88 | 92 | ||
89 | int channel_gk20a_commit_va(struct channel_gk20a *c) | 93 | int channel_gk20a_commit_va(struct channel_gk20a *c) |
@@ -361,6 +365,11 @@ void gk20a_channel_abort(struct channel_gk20a *ch) | |||
361 | struct channel_gk20a_job *job, *n; | 365 | struct channel_gk20a_job *job, *n; |
362 | bool released_job_semaphore = false; | 366 | bool released_job_semaphore = false; |
363 | 367 | ||
368 | gk20a_dbg_fn(""); | ||
369 | |||
370 | /* make sure new kickoffs are prevented */ | ||
371 | ch->has_timedout = true; | ||
372 | |||
364 | /* ensure no fences are pending */ | 373 | /* ensure no fences are pending */ |
365 | mutex_lock(&ch->submit_lock); | 374 | mutex_lock(&ch->submit_lock); |
366 | if (ch->sync) | 375 | if (ch->sync) |
@@ -416,6 +425,8 @@ void gk20a_disable_channel(struct channel_gk20a *ch, | |||
416 | bool finish, | 425 | bool finish, |
417 | unsigned long finish_timeout) | 426 | unsigned long finish_timeout) |
418 | { | 427 | { |
428 | gk20a_dbg_fn(""); | ||
429 | |||
419 | if (finish) { | 430 | if (finish) { |
420 | int err = gk20a_channel_finish(ch, finish_timeout); | 431 | int err = gk20a_channel_finish(ch, finish_timeout); |
421 | WARN_ON(err); | 432 | WARN_ON(err); |
@@ -627,8 +638,9 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) | |||
627 | (u32)(nsec >> 32); | 638 | (u32)(nsec >> 32); |
628 | ch->error_notifier->info32 = error; | 639 | ch->error_notifier->info32 = error; |
629 | ch->error_notifier->status = 0xffff; | 640 | ch->error_notifier->status = 0xffff; |
641 | |||
630 | gk20a_err(dev_from_gk20a(ch->g), | 642 | gk20a_err(dev_from_gk20a(ch->g), |
631 | "error notifier set to %d for ch %d\n", error, ch->hw_chid); | 643 | "error notifier set to %d for ch %d", error, ch->hw_chid); |
632 | } | 644 | } |
633 | } | 645 | } |
634 | 646 | ||
@@ -643,7 +655,53 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch) | |||
643 | } | 655 | } |
644 | } | 656 | } |
645 | 657 | ||
646 | void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | 658 | /* Returns delta of cyclic integers a and b. If a is ahead of b, delta |
659 | * is positive */ | ||
660 | static int cyclic_delta(int a, int b) | ||
661 | { | ||
662 | return a - b; | ||
663 | } | ||
664 | |||
665 | static void gk20a_wait_for_deferred_interrupts(struct gk20a *g) | ||
666 | { | ||
667 | int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count); | ||
668 | int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count); | ||
669 | |||
670 | /* wait until all stalling irqs are handled */ | ||
671 | wait_event(g->sw_irq_stall_last_handled_wq, | ||
672 | cyclic_delta(stall_irq_threshold, | ||
673 | atomic_read(&g->sw_irq_stall_last_handled)) | ||
674 | <= 0); | ||
675 | |||
676 | /* wait until all non-stalling irqs are handled */ | ||
677 | wait_event(g->sw_irq_nonstall_last_handled_wq, | ||
678 | cyclic_delta(nonstall_irq_threshold, | ||
679 | atomic_read(&g->sw_irq_nonstall_last_handled)) | ||
680 | <= 0); | ||
681 | } | ||
682 | |||
683 | static void gk20a_wait_until_counter_is_N( | ||
684 | struct channel_gk20a *ch, atomic_t *counter, int wait_value, | ||
685 | wait_queue_head_t *wq, const char *caller, const char *counter_name) | ||
686 | { | ||
687 | while (true) { | ||
688 | if (wait_event_timeout( | ||
689 | *wq, | ||
690 | atomic_read(counter) == wait_value, | ||
691 | msecs_to_jiffies(5000)) > 0) | ||
692 | break; | ||
693 | |||
694 | gk20a_warn(dev_from_gk20a(ch->g), | ||
695 | "%s: channel %d, still waiting, %s left: %d, waiting for: %d", | ||
696 | caller, ch->hw_chid, counter_name, | ||
697 | atomic_read(counter), wait_value); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | |||
702 | |||
703 | /* call ONLY when no references to the channel exist: after the last put */ | ||
704 | static void gk20a_free_channel(struct channel_gk20a *ch) | ||
647 | { | 705 | { |
648 | struct gk20a *g = ch->g; | 706 | struct gk20a *g = ch->g; |
649 | struct fifo_gk20a *f = &g->fifo; | 707 | struct fifo_gk20a *f = &g->fifo; |
@@ -654,13 +712,50 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | |||
654 | 712 | ||
655 | gk20a_dbg_fn(""); | 713 | gk20a_dbg_fn(""); |
656 | 714 | ||
715 | WARN_ON(ch->g == NULL); | ||
716 | |||
717 | trace_gk20a_free_channel(ch->hw_chid); | ||
718 | |||
719 | /* prevent new kickoffs */ | ||
720 | ch->has_timedout = true; | ||
721 | wmb(); | ||
722 | |||
723 | /* wait until there's only our ref to the channel */ | ||
724 | gk20a_wait_until_counter_is_N( | ||
725 | ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, | ||
726 | __func__, "references"); | ||
727 | |||
728 | /* wait until all pending interrupts for recently completed | ||
729 | * jobs are handled */ | ||
730 | gk20a_wait_for_deferred_interrupts(g); | ||
731 | |||
732 | /* prevent new refs */ | ||
733 | spin_lock(&ch->ref_obtain_lock); | ||
734 | if (!ch->referenceable) { | ||
735 | spin_unlock(&ch->ref_obtain_lock); | ||
736 | gk20a_err(dev_from_gk20a(ch->g), | ||
737 | "Extra %s() called to channel %u", | ||
738 | __func__, ch->hw_chid); | ||
739 | return; | ||
740 | } | ||
741 | ch->referenceable = false; | ||
742 | spin_unlock(&ch->ref_obtain_lock); | ||
743 | |||
744 | /* matches with the initial reference in gk20a_open_new_channel() */ | ||
745 | atomic_dec(&ch->ref_count); | ||
746 | |||
747 | /* wait until no more refs to the channel */ | ||
748 | gk20a_wait_until_counter_is_N( | ||
749 | ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, | ||
750 | __func__, "references"); | ||
751 | |||
657 | /* if engine reset was deferred, perform it now */ | 752 | /* if engine reset was deferred, perform it now */ |
658 | mutex_lock(&f->deferred_reset_mutex); | 753 | mutex_lock(&f->deferred_reset_mutex); |
659 | if (g->fifo.deferred_reset_pending) { | 754 | if (g->fifo.deferred_reset_pending) { |
660 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" | 755 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" |
661 | " deferred, running now"); | 756 | " deferred, running now"); |
662 | gk20a_fifo_reset_engine(g, g->fifo.mmu_fault_engines); | 757 | gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines); |
663 | g->fifo.mmu_fault_engines = 0; | 758 | g->fifo.deferred_fault_engines = 0; |
664 | g->fifo.deferred_reset_pending = false; | 759 | g->fifo.deferred_reset_pending = false; |
665 | } | 760 | } |
666 | mutex_unlock(&f->deferred_reset_mutex); | 761 | mutex_unlock(&f->deferred_reset_mutex); |
@@ -674,7 +769,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | |||
674 | gk20a_dbg_info("freeing bound channel context, timeout=%ld", | 769 | gk20a_dbg_info("freeing bound channel context, timeout=%ld", |
675 | timeout); | 770 | timeout); |
676 | 771 | ||
677 | gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout); | 772 | gk20a_disable_channel(ch, !ch->has_timedout, timeout); |
678 | 773 | ||
679 | gk20a_free_error_notifiers(ch); | 774 | gk20a_free_error_notifiers(ch); |
680 | 775 | ||
@@ -714,6 +809,10 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) | |||
714 | spin_unlock(&ch->update_fn_lock); | 809 | spin_unlock(&ch->update_fn_lock); |
715 | cancel_work_sync(&ch->update_fn_work); | 810 | cancel_work_sync(&ch->update_fn_work); |
716 | 811 | ||
812 | /* make sure we don't have deferred interrupts pending that | ||
813 | * could still touch the channel */ | ||
814 | gk20a_wait_for_deferred_interrupts(g); | ||
815 | |||
717 | unbind: | 816 | unbind: |
718 | if (gk20a_is_channel_marked_as_tsg(ch)) | 817 | if (gk20a_is_channel_marked_as_tsg(ch)) |
719 | gk20a_tsg_unbind_channel(ch); | 818 | gk20a_tsg_unbind_channel(ch); |
@@ -743,8 +842,66 @@ unbind: | |||
743 | mutex_unlock(&ch->dbg_s_lock); | 842 | mutex_unlock(&ch->dbg_s_lock); |
744 | 843 | ||
745 | release: | 844 | release: |
845 | /* make sure we catch accesses of unopened channels in case | ||
846 | * there's non-refcounted channel pointers hanging around */ | ||
847 | ch->g = NULL; | ||
848 | wmb(); | ||
849 | |||
746 | /* ALWAYS last */ | 850 | /* ALWAYS last */ |
747 | release_used_channel(f, ch); | 851 | free_channel(f, ch); |
852 | } | ||
853 | |||
854 | /* Try to get a reference to the channel. Return nonzero on success. If fails, | ||
855 | * the channel is dead or being freed elsewhere and you must not touch it. | ||
856 | * | ||
857 | * Always when a channel_gk20a pointer is seen and about to be used, a | ||
858 | * reference must be held to it - either by you or the caller, which should be | ||
859 | * documented well or otherwise clearly seen. This usually boils down to the | ||
860 | * file from ioctls directly, or an explicit get in exception handlers when the | ||
861 | * channel is found by a hw_chid. | ||
862 | * | ||
863 | * Most global functions in this file require a reference to be held by the | ||
864 | * caller. | ||
865 | */ | ||
866 | struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, | ||
867 | const char *caller) { | ||
868 | struct channel_gk20a *ret; | ||
869 | |||
870 | spin_lock(&ch->ref_obtain_lock); | ||
871 | |||
872 | if (likely(ch->referenceable)) { | ||
873 | atomic_inc(&ch->ref_count); | ||
874 | ret = ch; | ||
875 | } else | ||
876 | ret = NULL; | ||
877 | |||
878 | spin_unlock(&ch->ref_obtain_lock); | ||
879 | |||
880 | if (ret) | ||
881 | trace_gk20a_channel_get(ch->hw_chid, caller); | ||
882 | |||
883 | return ret; | ||
884 | } | ||
885 | |||
886 | void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) | ||
887 | { | ||
888 | trace_gk20a_channel_put(ch->hw_chid, caller); | ||
889 | atomic_dec(&ch->ref_count); | ||
890 | wake_up_all(&ch->ref_count_dec_wq); | ||
891 | |||
892 | /* More puts than gets. Channel is probably going to get | ||
893 | * stuck. */ | ||
894 | WARN_ON(atomic_read(&ch->ref_count) < 0); | ||
895 | |||
896 | /* Also, more puts than gets. ref_count can go to 0 only if | ||
897 | * the channel is closing. Channel is probably going to get | ||
898 | * stuck. */ | ||
899 | WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable); | ||
900 | } | ||
901 | |||
902 | void gk20a_channel_close(struct channel_gk20a *ch) | ||
903 | { | ||
904 | gk20a_free_channel(ch); | ||
748 | } | 905 | } |
749 | 906 | ||
750 | int gk20a_channel_release(struct inode *inode, struct file *filp) | 907 | int gk20a_channel_release(struct inode *inode, struct file *filp) |
@@ -758,14 +915,14 @@ int gk20a_channel_release(struct inode *inode, struct file *filp) | |||
758 | 915 | ||
759 | trace_gk20a_channel_release(dev_name(&g->dev->dev)); | 916 | trace_gk20a_channel_release(dev_name(&g->dev->dev)); |
760 | 917 | ||
761 | err = gk20a_busy(ch->g->dev); | 918 | err = gk20a_busy(g->dev); |
762 | if (err) { | 919 | if (err) { |
763 | gk20a_err(dev_from_gk20a(g), "failed to release channel %d", | 920 | gk20a_err(dev_from_gk20a(g), "failed to release channel %d", |
764 | ch->hw_chid); | 921 | ch->hw_chid); |
765 | return err; | 922 | return err; |
766 | } | 923 | } |
767 | gk20a_free_channel(ch, true); | 924 | gk20a_channel_close(ch); |
768 | gk20a_idle(ch->g->dev); | 925 | gk20a_idle(g->dev); |
769 | 926 | ||
770 | filp->private_data = NULL; | 927 | filp->private_data = NULL; |
771 | return 0; | 928 | return 0; |
@@ -808,22 +965,31 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) | |||
808 | struct fifo_gk20a *f = &g->fifo; | 965 | struct fifo_gk20a *f = &g->fifo; |
809 | struct channel_gk20a *ch; | 966 | struct channel_gk20a *ch; |
810 | 967 | ||
811 | ch = acquire_unused_channel(f); | 968 | gk20a_dbg_fn(""); |
969 | |||
970 | ch = allocate_channel(f); | ||
812 | if (ch == NULL) { | 971 | if (ch == NULL) { |
813 | /* TBD: we want to make this virtualizable */ | 972 | /* TBD: we want to make this virtualizable */ |
814 | gk20a_err(dev_from_gk20a(g), "out of hw chids"); | 973 | gk20a_err(dev_from_gk20a(g), "out of hw chids"); |
815 | return NULL; | 974 | return NULL; |
816 | } | 975 | } |
817 | 976 | ||
977 | trace_gk20a_open_new_channel(ch->hw_chid); | ||
978 | |||
979 | BUG_ON(ch->g); | ||
818 | ch->g = g; | 980 | ch->g = g; |
819 | 981 | ||
820 | if (g->ops.fifo.alloc_inst(g, ch)) { | 982 | if (g->ops.fifo.alloc_inst(g, ch)) { |
821 | ch->in_use = false; | 983 | ch->g = NULL; |
984 | free_channel(f, ch); | ||
822 | gk20a_err(dev_from_gk20a(g), | 985 | gk20a_err(dev_from_gk20a(g), |
823 | "failed to open gk20a channel, out of inst mem"); | 986 | "failed to open gk20a channel, out of inst mem"); |
824 | |||
825 | return NULL; | 987 | return NULL; |
826 | } | 988 | } |
989 | |||
990 | /* now the channel is in a limbo out of the free list but not marked as | ||
991 | * alive and used (i.e. get-able) yet */ | ||
992 | |||
827 | ch->pid = current->pid; | 993 | ch->pid = current->pid; |
828 | 994 | ||
829 | /* By default, channel is regular (non-TSG) channel */ | 995 | /* By default, channel is regular (non-TSG) channel */ |
@@ -854,6 +1020,13 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) | |||
854 | spin_lock_init(&ch->update_fn_lock); | 1020 | spin_lock_init(&ch->update_fn_lock); |
855 | INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); | 1021 | INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); |
856 | 1022 | ||
1023 | /* Mark the channel alive, get-able, with 1 initial use | ||
1024 | * references. The initial reference will be decreased in | ||
1025 | * gk20a_free_channel() */ | ||
1026 | ch->referenceable = true; | ||
1027 | atomic_set(&ch->ref_count, 1); | ||
1028 | wmb(); | ||
1029 | |||
857 | return ch; | 1030 | return ch; |
858 | } | 1031 | } |
859 | 1032 | ||
@@ -1379,7 +1552,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1379 | struct mapped_buffer_node **mapped_buffers = NULL; | 1552 | struct mapped_buffer_node **mapped_buffers = NULL; |
1380 | int err = 0, num_mapped_buffers; | 1553 | int err = 0, num_mapped_buffers; |
1381 | 1554 | ||
1382 | /* job needs reference to this vm */ | 1555 | /* job needs reference to this vm (released in channel_update) */ |
1383 | gk20a_vm_get(vm); | 1556 | gk20a_vm_get(vm); |
1384 | 1557 | ||
1385 | err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); | 1558 | err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); |
@@ -1395,14 +1568,21 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1395 | return -ENOMEM; | 1568 | return -ENOMEM; |
1396 | } | 1569 | } |
1397 | 1570 | ||
1398 | job->num_mapped_buffers = num_mapped_buffers; | 1571 | /* put() is done in gk20a_channel_update() when the job is done */ |
1399 | job->mapped_buffers = mapped_buffers; | 1572 | c = gk20a_channel_get(c); |
1400 | job->pre_fence = gk20a_fence_get(pre_fence); | ||
1401 | job->post_fence = gk20a_fence_get(post_fence); | ||
1402 | 1573 | ||
1403 | mutex_lock(&c->jobs_lock); | 1574 | if (c) { |
1404 | list_add_tail(&job->list, &c->jobs); | 1575 | job->num_mapped_buffers = num_mapped_buffers; |
1405 | mutex_unlock(&c->jobs_lock); | 1576 | job->mapped_buffers = mapped_buffers; |
1577 | job->pre_fence = gk20a_fence_get(pre_fence); | ||
1578 | job->post_fence = gk20a_fence_get(post_fence); | ||
1579 | |||
1580 | mutex_lock(&c->jobs_lock); | ||
1581 | list_add_tail(&job->list, &c->jobs); | ||
1582 | mutex_unlock(&c->jobs_lock); | ||
1583 | } else { | ||
1584 | return -ETIMEDOUT; | ||
1585 | } | ||
1406 | 1586 | ||
1407 | return 0; | 1587 | return 0; |
1408 | } | 1588 | } |
@@ -1412,13 +1592,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1412 | struct vm_gk20a *vm = c->vm; | 1592 | struct vm_gk20a *vm = c->vm; |
1413 | struct channel_gk20a_job *job, *n; | 1593 | struct channel_gk20a_job *job, *n; |
1414 | 1594 | ||
1415 | trace_gk20a_channel_update(c); | 1595 | trace_gk20a_channel_update(c->hw_chid); |
1416 | 1596 | ||
1417 | wake_up(&c->submit_wq); | 1597 | wake_up(&c->submit_wq); |
1418 | 1598 | ||
1419 | mutex_lock(&c->submit_lock); | 1599 | mutex_lock(&c->submit_lock); |
1420 | mutex_lock(&c->jobs_lock); | 1600 | mutex_lock(&c->jobs_lock); |
1421 | list_for_each_entry_safe(job, n, &c->jobs, list) { | 1601 | list_for_each_entry_safe(job, n, &c->jobs, list) { |
1602 | struct gk20a *g = c->g; | ||
1603 | |||
1422 | bool completed = gk20a_fence_is_expired(job->post_fence); | 1604 | bool completed = gk20a_fence_is_expired(job->post_fence); |
1423 | if (!completed) | 1605 | if (!completed) |
1424 | break; | 1606 | break; |
@@ -1434,12 +1616,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1434 | gk20a_fence_put(job->pre_fence); | 1616 | gk20a_fence_put(job->pre_fence); |
1435 | gk20a_fence_put(job->post_fence); | 1617 | gk20a_fence_put(job->post_fence); |
1436 | 1618 | ||
1437 | /* job is done. release its reference to vm */ | 1619 | /* job is done. release its vm reference (taken in add_job) */ |
1438 | gk20a_vm_put(vm); | 1620 | gk20a_vm_put(vm); |
1621 | /* another bookkeeping taken in add_job. caller must hold a ref | ||
1622 | * so this wouldn't get freed here. */ | ||
1623 | gk20a_channel_put(c); | ||
1439 | 1624 | ||
1440 | list_del_init(&job->list); | 1625 | list_del_init(&job->list); |
1441 | kfree(job); | 1626 | kfree(job); |
1442 | gk20a_idle(c->g->dev); | 1627 | gk20a_idle(g->dev); |
1443 | } | 1628 | } |
1444 | 1629 | ||
1445 | /* | 1630 | /* |
@@ -1719,10 +1904,13 @@ clean_up: | |||
1719 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) | 1904 | int gk20a_init_channel_support(struct gk20a *g, u32 chid) |
1720 | { | 1905 | { |
1721 | struct channel_gk20a *c = g->fifo.channel+chid; | 1906 | struct channel_gk20a *c = g->fifo.channel+chid; |
1722 | c->g = g; | 1907 | c->g = NULL; |
1723 | c->in_use = false; | ||
1724 | c->hw_chid = chid; | 1908 | c->hw_chid = chid; |
1725 | c->bound = false; | 1909 | c->bound = false; |
1910 | spin_lock_init(&c->ref_obtain_lock); | ||
1911 | atomic_set(&c->ref_count, 0); | ||
1912 | c->referenceable = false; | ||
1913 | init_waitqueue_head(&c->ref_count_dec_wq); | ||
1726 | mutex_init(&c->ioctl_lock); | 1914 | mutex_init(&c->ioctl_lock); |
1727 | mutex_init(&c->jobs_lock); | 1915 | mutex_init(&c->jobs_lock); |
1728 | mutex_init(&c->submit_lock); | 1916 | mutex_init(&c->submit_lock); |
@@ -1733,6 +1921,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
1733 | #endif | 1921 | #endif |
1734 | INIT_LIST_HEAD(&c->dbg_s_list); | 1922 | INIT_LIST_HEAD(&c->dbg_s_list); |
1735 | mutex_init(&c->dbg_s_lock); | 1923 | mutex_init(&c->dbg_s_lock); |
1924 | list_add(&c->free_chs, &g->fifo.free_chs); | ||
1736 | 1925 | ||
1737 | return 0; | 1926 | return 0; |
1738 | } | 1927 | } |
@@ -2066,8 +2255,7 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
2066 | 2255 | ||
2067 | for (chid = 0; chid < f->num_channels; chid++) { | 2256 | for (chid = 0; chid < f->num_channels; chid++) { |
2068 | struct channel_gk20a *ch = &f->channel[chid]; | 2257 | struct channel_gk20a *ch = &f->channel[chid]; |
2069 | if (ch->in_use) { | 2258 | if (gk20a_channel_get(ch)) { |
2070 | |||
2071 | gk20a_dbg_info("suspend channel %d", chid); | 2259 | gk20a_dbg_info("suspend channel %d", chid); |
2072 | /* disable channel */ | 2260 | /* disable channel */ |
2073 | g->ops.fifo.disable_channel(ch); | 2261 | g->ops.fifo.disable_channel(ch); |
@@ -2079,6 +2267,8 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
2079 | flush_work(&ch->update_fn_work); | 2267 | flush_work(&ch->update_fn_work); |
2080 | 2268 | ||
2081 | channels_in_use = true; | 2269 | channels_in_use = true; |
2270 | |||
2271 | gk20a_channel_put(ch); | ||
2082 | } | 2272 | } |
2083 | } | 2273 | } |
2084 | 2274 | ||
@@ -2086,8 +2276,10 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
2086 | g->ops.fifo.update_runlist(g, 0, ~0, false, true); | 2276 | g->ops.fifo.update_runlist(g, 0, ~0, false, true); |
2087 | 2277 | ||
2088 | for (chid = 0; chid < f->num_channels; chid++) { | 2278 | for (chid = 0; chid < f->num_channels; chid++) { |
2089 | if (f->channel[chid].in_use) | 2279 | if (gk20a_channel_get(&f->channel[chid])) { |
2090 | g->ops.fifo.unbind_channel(&f->channel[chid]); | 2280 | g->ops.fifo.unbind_channel(&f->channel[chid]); |
2281 | gk20a_channel_put(&f->channel[chid]); | ||
2282 | } | ||
2091 | } | 2283 | } |
2092 | } | 2284 | } |
2093 | 2285 | ||
@@ -2095,8 +2287,6 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
2095 | return 0; | 2287 | return 0; |
2096 | } | 2288 | } |
2097 | 2289 | ||
2098 | /* in this context the "channel" is the host1x channel which | ||
2099 | * maps to *all* gk20a channels */ | ||
2100 | int gk20a_channel_resume(struct gk20a *g) | 2290 | int gk20a_channel_resume(struct gk20a *g) |
2101 | { | 2291 | { |
2102 | struct fifo_gk20a *f = &g->fifo; | 2292 | struct fifo_gk20a *f = &g->fifo; |
@@ -2106,10 +2296,11 @@ int gk20a_channel_resume(struct gk20a *g) | |||
2106 | gk20a_dbg_fn(""); | 2296 | gk20a_dbg_fn(""); |
2107 | 2297 | ||
2108 | for (chid = 0; chid < f->num_channels; chid++) { | 2298 | for (chid = 0; chid < f->num_channels; chid++) { |
2109 | if (f->channel[chid].in_use) { | 2299 | if (gk20a_channel_get(&f->channel[chid])) { |
2110 | gk20a_dbg_info("resume channel %d", chid); | 2300 | gk20a_dbg_info("resume channel %d", chid); |
2111 | g->ops.fifo.bind_channel(&f->channel[chid]); | 2301 | g->ops.fifo.bind_channel(&f->channel[chid]); |
2112 | channels_in_use = true; | 2302 | channels_in_use = true; |
2303 | gk20a_channel_put(&f->channel[chid]); | ||
2113 | } | 2304 | } |
2114 | } | 2305 | } |
2115 | 2306 | ||
@@ -2129,10 +2320,11 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g) | |||
2129 | 2320 | ||
2130 | for (chid = 0; chid < f->num_channels; chid++) { | 2321 | for (chid = 0; chid < f->num_channels; chid++) { |
2131 | struct channel_gk20a *c = g->fifo.channel+chid; | 2322 | struct channel_gk20a *c = g->fifo.channel+chid; |
2132 | if (c->in_use) { | 2323 | if (gk20a_channel_get(c)) { |
2133 | gk20a_channel_event(c); | 2324 | gk20a_channel_event(c); |
2134 | wake_up_interruptible_all(&c->semaphore_wq); | 2325 | wake_up_interruptible_all(&c->semaphore_wq); |
2135 | gk20a_channel_update(c, 0); | 2326 | gk20a_channel_update(c, 0); |
2327 | gk20a_channel_put(c); | ||
2136 | } | 2328 | } |
2137 | } | 2329 | } |
2138 | } | 2330 | } |
@@ -2225,10 +2417,18 @@ long gk20a_channel_ioctl(struct file *filp, | |||
2225 | return -EFAULT; | 2417 | return -EFAULT; |
2226 | } | 2418 | } |
2227 | 2419 | ||
2420 | /* take a ref or return timeout if channel refs can't be taken */ | ||
2421 | ch = gk20a_channel_get(ch); | ||
2422 | if (!ch) | ||
2423 | return -ETIMEDOUT; | ||
2424 | |||
2228 | /* protect our sanity for threaded userspace - most of the channel is | 2425 | /* protect our sanity for threaded userspace - most of the channel is |
2229 | * not thread safe */ | 2426 | * not thread safe */ |
2230 | mutex_lock(&ch->ioctl_lock); | 2427 | mutex_lock(&ch->ioctl_lock); |
2231 | 2428 | ||
2429 | /* this ioctl call keeps a ref to the file which keeps a ref to the | ||
2430 | * channel */ | ||
2431 | |||
2232 | switch (cmd) { | 2432 | switch (cmd) { |
2233 | case NVGPU_IOCTL_CHANNEL_OPEN: | 2433 | case NVGPU_IOCTL_CHANNEL_OPEN: |
2234 | err = gk20a_channel_open_ioctl(ch->g, | 2434 | err = gk20a_channel_open_ioctl(ch->g, |
@@ -2449,9 +2649,11 @@ long gk20a_channel_ioctl(struct file *filp, | |||
2449 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | 2649 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) |
2450 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); | 2650 | err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); |
2451 | 2651 | ||
2452 | gk20a_dbg_fn("end"); | ||
2453 | |||
2454 | mutex_unlock(&ch->ioctl_lock); | 2652 | mutex_unlock(&ch->ioctl_lock); |
2455 | 2653 | ||
2654 | gk20a_channel_put(ch); | ||
2655 | |||
2656 | gk20a_dbg_fn("end"); | ||
2657 | |||
2456 | return err; | 2658 | return err; |
2457 | } | 2659 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index f022fe36..2ea5b4be 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -19,12 +19,13 @@ | |||
19 | #define CHANNEL_GK20A_H | 19 | #define CHANNEL_GK20A_H |
20 | 20 | ||
21 | #include <linux/log2.h> | 21 | #include <linux/log2.h> |
22 | #include <linux/slab.h> | ||
23 | #include <linux/wait.h> | ||
24 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
25 | #include <uapi/linux/nvgpu.h> | ||
26 | #include <linux/poll.h> | 23 | #include <linux/poll.h> |
24 | #include <linux/semaphore.h> | ||
25 | #include <linux/slab.h> | ||
27 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
27 | #include <linux/wait.h> | ||
28 | #include <uapi/linux/nvgpu.h> | ||
28 | 29 | ||
29 | struct gk20a; | 30 | struct gk20a; |
30 | struct gr_gk20a; | 31 | struct gr_gk20a; |
@@ -77,8 +78,15 @@ struct channel_gk20a_poll_events { | |||
77 | 78 | ||
78 | /* this is the priv element of struct nvhost_channel */ | 79 | /* this is the priv element of struct nvhost_channel */ |
79 | struct channel_gk20a { | 80 | struct channel_gk20a { |
80 | struct gk20a *g; | 81 | struct gk20a *g; /* set only when channel is active */ |
81 | bool in_use; | 82 | |
83 | struct list_head free_chs; | ||
84 | |||
85 | spinlock_t ref_obtain_lock; | ||
86 | bool referenceable; | ||
87 | atomic_t ref_count; | ||
88 | wait_queue_head_t ref_count_dec_wq; | ||
89 | |||
82 | int hw_chid; | 90 | int hw_chid; |
83 | bool bound; | 91 | bool bound; |
84 | bool first_init; | 92 | bool first_init; |
@@ -171,7 +179,10 @@ static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) | |||
171 | } | 179 | } |
172 | int channel_gk20a_commit_va(struct channel_gk20a *c); | 180 | int channel_gk20a_commit_va(struct channel_gk20a *c); |
173 | int gk20a_init_channel_support(struct gk20a *, u32 chid); | 181 | int gk20a_init_channel_support(struct gk20a *, u32 chid); |
174 | void gk20a_free_channel(struct channel_gk20a *ch, bool finish); | 182 | |
183 | /* must be inside gk20a_busy()..gk20a_idle() */ | ||
184 | void gk20a_channel_close(struct channel_gk20a *ch); | ||
185 | |||
175 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, | 186 | bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, |
176 | u32 timeout_delta_ms); | 187 | u32 timeout_delta_ms); |
177 | void gk20a_disable_channel(struct channel_gk20a *ch, | 188 | void gk20a_disable_channel(struct channel_gk20a *ch, |
@@ -202,6 +213,15 @@ void gk20a_channel_event(struct channel_gk20a *ch); | |||
202 | 213 | ||
203 | void gk20a_init_channel(struct gpu_ops *gops); | 214 | void gk20a_init_channel(struct gpu_ops *gops); |
204 | 215 | ||
216 | /* returns ch if reference was obtained */ | ||
217 | struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, | ||
218 | const char *caller); | ||
219 | #define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__) | ||
220 | |||
221 | |||
222 | void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); | ||
223 | #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) | ||
224 | |||
205 | int gk20a_wait_channel_idle(struct channel_gk20a *ch); | 225 | int gk20a_wait_channel_idle(struct channel_gk20a *ch); |
206 | struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g); | 226 | struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g); |
207 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | 227 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 8cc852c7..7a707fbd 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -154,8 +154,23 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | |||
154 | 154 | ||
155 | static void gk20a_channel_syncpt_update(void *priv, int nr_completed) | 155 | static void gk20a_channel_syncpt_update(void *priv, int nr_completed) |
156 | { | 156 | { |
157 | struct channel_gk20a *ch20a = priv; | 157 | struct channel_gk20a *ch = priv; |
158 | gk20a_channel_update(ch20a, nr_completed); | 158 | struct gk20a *g = ch->g; |
159 | |||
160 | /* need busy for possible channel deletion */ | ||
161 | if (gk20a_busy(ch->g->dev)) { | ||
162 | gk20a_err(dev_from_gk20a(ch->g), | ||
163 | "failed to busy while syncpt update"); | ||
164 | /* Last gk20a_idle()s are in channel_update, so we shouldn't | ||
165 | * get here. If we do, the channel is badly broken now */ | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ | ||
170 | gk20a_channel_update(ch, nr_completed); | ||
171 | gk20a_channel_put(ch); | ||
172 | |||
173 | gk20a_idle(g->dev); | ||
159 | } | 174 | } |
160 | 175 | ||
161 | static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | 176 | static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, |
@@ -209,14 +224,37 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
209 | thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); | 224 | thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); |
210 | 225 | ||
211 | if (register_irq) { | 226 | if (register_irq) { |
212 | err = nvhost_intr_register_notifier(sp->host1x_pdev, | 227 | err = gk20a_busy(c->g->dev); |
213 | sp->id, thresh, | 228 | if (err) |
214 | gk20a_channel_syncpt_update, c); | 229 | gk20a_err(dev_from_gk20a(c->g), |
215 | 230 | "failed to add syncpt interrupt notifier for channel %d", | |
216 | /* Adding interrupt action should never fail. A proper error | 231 | c->hw_chid); |
217 | * handling here would require us to decrement the syncpt max | 232 | else { |
218 | * back to its original value. */ | 233 | struct channel_gk20a *referenced = gk20a_channel_get(c); |
219 | WARN(err, "failed to set submit complete interrupt"); | 234 | |
235 | WARN_ON(!referenced); | ||
236 | gk20a_idle(c->g->dev); | ||
237 | |||
238 | if (referenced) { | ||
239 | /* note: channel_put() is in | ||
240 | * gk20a_channel_syncpt_update() */ | ||
241 | |||
242 | err = nvhost_intr_register_notifier( | ||
243 | sp->host1x_pdev, | ||
244 | sp->id, thresh, | ||
245 | gk20a_channel_syncpt_update, c); | ||
246 | if (err) | ||
247 | gk20a_channel_put(referenced); | ||
248 | |||
249 | /* Adding interrupt action should | ||
250 | * never fail. A proper error handling | ||
251 | * here would require us to decrement | ||
252 | * the syncpt max back to its original | ||
253 | * value. */ | ||
254 | WARN(err, | ||
255 | "failed to set submit complete interrupt"); | ||
256 | } | ||
257 | } | ||
220 | } | 258 | } |
221 | 259 | ||
222 | *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, | 260 | *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, |
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 0f1c31dd..bda0dab0 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -36,6 +36,7 @@ static struct platform_device *gk20a_device; | |||
36 | 36 | ||
37 | struct ch_state { | 37 | struct ch_state { |
38 | int pid; | 38 | int pid; |
39 | int refs; | ||
39 | u8 inst_block[0]; | 40 | u8 inst_block[0]; |
40 | }; | 41 | }; |
41 | 42 | ||
@@ -118,9 +119,10 @@ static void gk20a_debug_show_channel(struct gk20a *g, | |||
118 | syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); | 119 | syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); |
119 | syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); | 120 | syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); |
120 | 121 | ||
121 | gk20a_debug_output(o, "%d-%s, pid %d: ", hw_chid, | 122 | gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, |
122 | g->dev->name, | 123 | g->dev->name, |
123 | ch_state->pid); | 124 | ch_state->pid, |
125 | ch_state->refs); | ||
124 | gk20a_debug_output(o, "%s in use %s %s\n", | 126 | gk20a_debug_output(o, "%s in use %s %s\n", |
125 | ccsr_channel_enable_v(channel) ? "" : "not", | 127 | ccsr_channel_enable_v(channel) ? "" : "not", |
126 | ccsr_chan_status_str[status], | 128 | ccsr_chan_status_str[status], |
@@ -231,16 +233,30 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) | |||
231 | } | 233 | } |
232 | 234 | ||
233 | for (chid = 0; chid < f->num_channels; chid++) { | 235 | for (chid = 0; chid < f->num_channels; chid++) { |
234 | if (f->channel[chid].in_use) | 236 | struct channel_gk20a *ch = &f->channel[chid]; |
235 | ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL); | 237 | if (gk20a_channel_get(ch)) { |
238 | ch_state[chid] = | ||
239 | kmalloc(sizeof(struct ch_state) + | ||
240 | ram_in_alloc_size_v(), GFP_KERNEL); | ||
241 | /* ref taken stays to below loop with | ||
242 | * successful allocs */ | ||
243 | if (!ch_state[chid]) | ||
244 | gk20a_channel_put(ch); | ||
245 | } | ||
236 | } | 246 | } |
237 | 247 | ||
238 | for (chid = 0; chid < f->num_channels; chid++) { | 248 | for (chid = 0; chid < f->num_channels; chid++) { |
239 | if (ch_state[chid] && f->channel[chid].inst_block.cpu_va) { | 249 | struct channel_gk20a *ch = &f->channel[chid]; |
240 | ch_state[chid]->pid = f->channel[chid].pid; | 250 | if (ch_state[chid]) { |
241 | memcpy(&ch_state[chid]->inst_block[0], | 251 | if (ch->inst_block.cpu_va) { |
242 | f->channel[chid].inst_block.cpu_va, | 252 | ch_state[chid]->pid = ch->pid; |
243 | ram_in_alloc_size_v()); | 253 | ch_state[chid]->refs = |
254 | atomic_read(&ch->ref_count); | ||
255 | memcpy(&ch_state[chid]->inst_block[0], | ||
256 | ch->inst_block.cpu_va, | ||
257 | ram_in_alloc_size_v()); | ||
258 | } | ||
259 | gk20a_channel_put(ch); | ||
244 | } | 260 | } |
245 | } | 261 | } |
246 | for (chid = 0; chid < f->num_channels; chid++) { | 262 | for (chid = 0; chid < f->num_channels; chid++) { |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 56b954a9..4ef310b2 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -515,6 +515,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
515 | 515 | ||
516 | init_runlist(g, f); | 516 | init_runlist(g, f); |
517 | 517 | ||
518 | INIT_LIST_HEAD(&f->free_chs); | ||
519 | mutex_init(&f->free_chs_mutex); | ||
520 | |||
518 | for (chid = 0; chid < f->num_channels; chid++) { | 521 | for (chid = 0; chid < f->num_channels; chid++) { |
519 | f->channel[chid].userd_cpu_va = | 522 | f->channel[chid].userd_cpu_va = |
520 | f->userd.cpu_va + chid * f->userd_entry_size; | 523 | f->userd.cpu_va + chid * f->userd_entry_size; |
@@ -527,7 +530,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
527 | gk20a_init_channel_support(g, chid); | 530 | gk20a_init_channel_support(g, chid); |
528 | gk20a_init_tsg_support(g, chid); | 531 | gk20a_init_tsg_support(g, chid); |
529 | } | 532 | } |
530 | mutex_init(&f->ch_inuse_mutex); | ||
531 | mutex_init(&f->tsg_inuse_mutex); | 533 | mutex_init(&f->tsg_inuse_mutex); |
532 | 534 | ||
533 | f->remove_support = gk20a_remove_fifo_support; | 535 | f->remove_support = gk20a_remove_fifo_support; |
@@ -637,6 +639,7 @@ int gk20a_init_fifo_support(struct gk20a *g) | |||
637 | return err; | 639 | return err; |
638 | } | 640 | } |
639 | 641 | ||
642 | /* return with a reference to the channel, caller must put it back */ | ||
640 | static struct channel_gk20a * | 643 | static struct channel_gk20a * |
641 | channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) | 644 | channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) |
642 | { | 645 | { |
@@ -644,10 +647,16 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) | |||
644 | if (unlikely(!f->channel)) | 647 | if (unlikely(!f->channel)) |
645 | return NULL; | 648 | return NULL; |
646 | for (ci = 0; ci < f->num_channels; ci++) { | 649 | for (ci = 0; ci < f->num_channels; ci++) { |
647 | struct channel_gk20a *c = f->channel+ci; | 650 | struct channel_gk20a *ch = gk20a_channel_get(&f->channel[ci]); |
648 | if (c->inst_block.cpu_va && | 651 | /* only alive channels are searched */ |
649 | (inst_ptr == gk20a_mem_phys(&c->inst_block))) | 652 | if (!ch) |
650 | return f->channel+ci; | 653 | continue; |
654 | |||
655 | if (ch->inst_block.cpu_va && | ||
656 | (inst_ptr == gk20a_mem_phys(&ch->inst_block))) | ||
657 | return ch; | ||
658 | |||
659 | gk20a_channel_put(ch); | ||
651 | } | 660 | } |
652 | return NULL; | 661 | return NULL; |
653 | } | 662 | } |
@@ -803,6 +812,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, | |||
803 | return true; | 812 | return true; |
804 | } | 813 | } |
805 | 814 | ||
815 | /* caller must hold a channel reference */ | ||
806 | static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, | 816 | static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, |
807 | struct channel_gk20a *ch) | 817 | struct channel_gk20a *ch) |
808 | { | 818 | { |
@@ -854,14 +864,38 @@ static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, | |||
854 | "TSG %d generated a mmu fault", tsg->tsgid); | 864 | "TSG %d generated a mmu fault", tsg->tsgid); |
855 | 865 | ||
856 | mutex_lock(&tsg->ch_list_lock); | 866 | mutex_lock(&tsg->ch_list_lock); |
857 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) | 867 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { |
858 | ret = gk20a_fifo_set_ctx_mmu_error(g, ch); | 868 | if (gk20a_channel_get(ch)) { |
869 | if (!gk20a_fifo_set_ctx_mmu_error(g, ch)) | ||
870 | ret = false; | ||
871 | gk20a_channel_put(ch); | ||
872 | } | ||
873 | } | ||
859 | mutex_unlock(&tsg->ch_list_lock); | 874 | mutex_unlock(&tsg->ch_list_lock); |
860 | 875 | ||
861 | return ret; | 876 | return ret; |
862 | } | 877 | } |
863 | 878 | ||
864 | static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | 879 | static void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid) |
880 | { | ||
881 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; | ||
882 | struct channel_gk20a *ch; | ||
883 | |||
884 | mutex_lock(&tsg->ch_list_lock); | ||
885 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | ||
886 | if (gk20a_channel_get(ch)) { | ||
887 | gk20a_channel_abort(ch); | ||
888 | gk20a_channel_put(ch); | ||
889 | } | ||
890 | } | ||
891 | mutex_unlock(&tsg->ch_list_lock); | ||
892 | } | ||
893 | |||
894 | static bool gk20a_fifo_handle_mmu_fault( | ||
895 | struct gk20a *g, | ||
896 | u32 mmu_fault_engines, /* queried from HW if 0 */ | ||
897 | u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/ | ||
898 | bool id_is_tsg) | ||
865 | { | 899 | { |
866 | bool fake_fault; | 900 | bool fake_fault; |
867 | unsigned long fault_id; | 901 | unsigned long fault_id; |
@@ -894,10 +928,8 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
894 | grfifo_ctl | gr_gpfifo_ctl_access_f(0) | | 928 | grfifo_ctl | gr_gpfifo_ctl_access_f(0) | |
895 | gr_gpfifo_ctl_semaphore_access_f(0)); | 929 | gr_gpfifo_ctl_semaphore_access_f(0)); |
896 | 930 | ||
897 | /* If we have recovery in progress, MMU fault id is invalid */ | 931 | if (mmu_fault_engines) { |
898 | if (g->fifo.mmu_fault_engines) { | 932 | fault_id = mmu_fault_engines; |
899 | fault_id = g->fifo.mmu_fault_engines; | ||
900 | g->fifo.mmu_fault_engines = 0; | ||
901 | fake_fault = true; | 933 | fake_fault = true; |
902 | } else { | 934 | } else { |
903 | fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); | 935 | fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); |
@@ -914,6 +946,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
914 | struct fifo_mmu_fault_info_gk20a f; | 946 | struct fifo_mmu_fault_info_gk20a f; |
915 | struct channel_gk20a *ch = NULL; | 947 | struct channel_gk20a *ch = NULL; |
916 | struct tsg_gk20a *tsg = NULL; | 948 | struct tsg_gk20a *tsg = NULL; |
949 | struct channel_gk20a *referenced_channel = 0; | ||
917 | /* read and parse engine status */ | 950 | /* read and parse engine status */ |
918 | u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); | 951 | u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); |
919 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); | 952 | u32 ctx_status = fifo_engine_status_ctx_status_v(status); |
@@ -953,22 +986,34 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
953 | /* get the channel/TSG */ | 986 | /* get the channel/TSG */ |
954 | if (fake_fault) { | 987 | if (fake_fault) { |
955 | /* use next_id if context load is failing */ | 988 | /* use next_id if context load is failing */ |
956 | u32 id = (ctx_status == | 989 | u32 id, type; |
957 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | 990 | |
958 | fifo_engine_status_next_id_v(status) : | 991 | if (hw_id == ~(u32)0) { |
959 | fifo_engine_status_id_v(status); | 992 | id = (ctx_status == |
960 | u32 type = (ctx_status == | 993 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? |
961 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | 994 | fifo_engine_status_next_id_v(status) : |
962 | fifo_engine_status_next_id_type_v(status) : | 995 | fifo_engine_status_id_v(status); |
963 | fifo_engine_status_id_type_v(status); | 996 | type = (ctx_status == |
997 | fifo_engine_status_ctx_status_ctxsw_load_v()) ? | ||
998 | fifo_engine_status_next_id_type_v(status) : | ||
999 | fifo_engine_status_id_type_v(status); | ||
1000 | } else { | ||
1001 | id = hw_id; | ||
1002 | type = id_is_tsg ? | ||
1003 | fifo_engine_status_id_type_tsgid_v() : | ||
1004 | fifo_engine_status_id_type_chid_v(); | ||
1005 | } | ||
964 | 1006 | ||
965 | if (type == fifo_engine_status_id_type_tsgid_v()) | 1007 | if (type == fifo_engine_status_id_type_tsgid_v()) |
966 | tsg = &g->fifo.tsg[id]; | 1008 | tsg = &g->fifo.tsg[id]; |
967 | else if (type == fifo_engine_status_id_type_chid_v()) | 1009 | else if (type == fifo_engine_status_id_type_chid_v()) { |
968 | ch = &g->fifo.channel[id]; | 1010 | ch = &g->fifo.channel[id]; |
1011 | referenced_channel = gk20a_channel_get(ch); | ||
1012 | } | ||
969 | } else { | 1013 | } else { |
970 | /* read channel based on instruction pointer */ | 1014 | /* read channel based on instruction pointer */ |
971 | ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); | 1015 | ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); |
1016 | referenced_channel = ch; | ||
972 | } | 1017 | } |
973 | 1018 | ||
974 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) | 1019 | if (ch && gk20a_is_channel_marked_as_tsg(ch)) |
@@ -977,7 +1022,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
977 | /* check if engine reset should be deferred */ | 1022 | /* check if engine reset should be deferred */ |
978 | if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g, | 1023 | if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g, |
979 | engine_id, &f, fake_fault)) { | 1024 | engine_id, &f, fake_fault)) { |
980 | g->fifo.mmu_fault_engines = fault_id; | 1025 | g->fifo.deferred_fault_engines = fault_id; |
981 | 1026 | ||
982 | /* handled during channel free */ | 1027 | /* handled during channel free */ |
983 | g->fifo.deferred_reset_pending = true; | 1028 | g->fifo.deferred_reset_pending = true; |
@@ -988,19 +1033,31 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
988 | * syncpoints */ | 1033 | * syncpoints */ |
989 | 1034 | ||
990 | if (tsg) { | 1035 | if (tsg) { |
991 | struct channel_gk20a *ch = NULL; | ||
992 | if (!g->fifo.deferred_reset_pending) | 1036 | if (!g->fifo.deferred_reset_pending) |
993 | verbose = | 1037 | verbose = |
994 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); | 1038 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); |
995 | mutex_lock(&tsg->ch_list_lock); | 1039 | |
996 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) | 1040 | gk20a_fifo_abort_tsg(g, ch->tsgid); |
997 | gk20a_channel_abort(ch); | 1041 | |
998 | mutex_unlock(&tsg->ch_list_lock); | 1042 | /* put back the ref taken early above */ |
1043 | if (referenced_channel) { | ||
1044 | gk20a_channel_put(ch); | ||
1045 | } else { | ||
1046 | gk20a_err(dev_from_gk20a(g), | ||
1047 | "mmu error in freed tsg channel %d on tsgid %d", | ||
1048 | ch->hw_chid, ch->tsgid); | ||
1049 | } | ||
999 | } else if (ch) { | 1050 | } else if (ch) { |
1000 | if (!g->fifo.deferred_reset_pending) | 1051 | if (referenced_channel) { |
1001 | verbose = | 1052 | if (!g->fifo.deferred_reset_pending) |
1002 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); | 1053 | verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch); |
1003 | gk20a_channel_abort(ch); | 1054 | gk20a_channel_abort(ch); |
1055 | gk20a_channel_put(ch); | ||
1056 | } else { | ||
1057 | gk20a_err(dev_from_gk20a(g), | ||
1058 | "mmu error in freed channel %d", | ||
1059 | ch->hw_chid); | ||
1060 | } | ||
1004 | } else if (f.inst_ptr == | 1061 | } else if (f.inst_ptr == |
1005 | gk20a_mem_phys(&g->mm.bar1.inst_block)) { | 1062 | gk20a_mem_phys(&g->mm.bar1.inst_block)) { |
1006 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); | 1063 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); |
@@ -1133,46 +1190,69 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) | |||
1133 | 1190 | ||
1134 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) | 1191 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) |
1135 | { | 1192 | { |
1136 | u32 engines = gk20a_fifo_engines_on_id(g, hw_chid, false); | 1193 | u32 engines; |
1194 | |||
1195 | /* stop context switching to prevent engine assignments from | ||
1196 | changing until channel is recovered */ | ||
1197 | mutex_lock(&g->dbg_sessions_lock); | ||
1198 | gr_gk20a_disable_ctxsw(g); | ||
1199 | |||
1200 | engines = gk20a_fifo_engines_on_id(g, hw_chid, false); | ||
1201 | |||
1137 | if (engines) | 1202 | if (engines) |
1138 | gk20a_fifo_recover(g, engines, verbose); | 1203 | gk20a_fifo_recover(g, engines, hw_chid, false, verbose); |
1139 | else { | 1204 | else { |
1140 | struct channel_gk20a *ch = | 1205 | struct channel_gk20a *ch = &g->fifo.channel[hw_chid]; |
1141 | g->fifo.channel + hw_chid; | ||
1142 | 1206 | ||
1143 | gk20a_channel_abort(ch); | 1207 | if (gk20a_channel_get(ch)) { |
1208 | gk20a_channel_abort(ch); | ||
1144 | 1209 | ||
1145 | if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) | 1210 | if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) |
1146 | gk20a_debug_dump(g->dev); | 1211 | gk20a_debug_dump(g->dev); |
1212 | |||
1213 | gk20a_channel_put(ch); | ||
1214 | } | ||
1147 | } | 1215 | } |
1216 | |||
1217 | gr_gk20a_enable_ctxsw(g); | ||
1218 | mutex_unlock(&g->dbg_sessions_lock); | ||
1148 | } | 1219 | } |
1149 | 1220 | ||
1150 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) | 1221 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) |
1151 | { | 1222 | { |
1152 | u32 engines = gk20a_fifo_engines_on_id(g, tsgid, true); | 1223 | u32 engines; |
1224 | |||
1225 | /* stop context switching to prevent engine assignments from | ||
1226 | changing until TSG is recovered */ | ||
1227 | mutex_lock(&g->dbg_sessions_lock); | ||
1228 | gr_gk20a_disable_ctxsw(g); | ||
1229 | |||
1230 | engines = gk20a_fifo_engines_on_id(g, tsgid, true); | ||
1231 | |||
1153 | if (engines) | 1232 | if (engines) |
1154 | gk20a_fifo_recover(g, engines, verbose); | 1233 | gk20a_fifo_recover(g, engines, tsgid, true, verbose); |
1155 | else { | 1234 | else { |
1156 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; | 1235 | struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; |
1157 | struct channel_gk20a *ch; | ||
1158 | 1236 | ||
1159 | if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) | 1237 | if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) |
1160 | gk20a_debug_dump(g->dev); | 1238 | gk20a_debug_dump(g->dev); |
1161 | 1239 | ||
1162 | mutex_lock(&tsg->ch_list_lock); | 1240 | gk20a_fifo_abort_tsg(g, tsgid); |
1163 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) | ||
1164 | gk20a_channel_abort(ch); | ||
1165 | mutex_unlock(&tsg->ch_list_lock); | ||
1166 | } | 1241 | } |
1242 | |||
1243 | gr_gk20a_enable_ctxsw(g); | ||
1244 | mutex_unlock(&g->dbg_sessions_lock); | ||
1167 | } | 1245 | } |
1168 | 1246 | ||
1169 | void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | 1247 | void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, |
1170 | bool verbose) | 1248 | u32 hw_id, bool id_is_tsg, |
1249 | bool verbose) | ||
1171 | { | 1250 | { |
1172 | unsigned long engine_id, i; | 1251 | unsigned long engine_id, i; |
1173 | unsigned long _engine_ids = __engine_ids; | 1252 | unsigned long _engine_ids = __engine_ids; |
1174 | unsigned long engine_ids = 0; | 1253 | unsigned long engine_ids = 0; |
1175 | u32 val; | 1254 | u32 val; |
1255 | u32 mmu_fault_engines = 0; | ||
1176 | 1256 | ||
1177 | if (verbose) | 1257 | if (verbose) |
1178 | gk20a_debug_dump(g->dev); | 1258 | gk20a_debug_dump(g->dev); |
@@ -1181,7 +1261,6 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | |||
1181 | g->ops.ltc.flush(g); | 1261 | g->ops.ltc.flush(g); |
1182 | 1262 | ||
1183 | /* store faulted engines in advance */ | 1263 | /* store faulted engines in advance */ |
1184 | g->fifo.mmu_fault_engines = 0; | ||
1185 | for_each_set_bit(engine_id, &_engine_ids, 32) { | 1264 | for_each_set_bit(engine_id, &_engine_ids, 32) { |
1186 | u32 ref_type; | 1265 | u32 ref_type; |
1187 | u32 ref_id; | 1266 | u32 ref_id; |
@@ -1196,11 +1275,10 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | |||
1196 | gk20a_fifo_get_faulty_id_type(g, i, &id, &type); | 1275 | gk20a_fifo_get_faulty_id_type(g, i, &id, &type); |
1197 | if (ref_type == type && ref_id == id) { | 1276 | if (ref_type == type && ref_id == id) { |
1198 | engine_ids |= BIT(i); | 1277 | engine_ids |= BIT(i); |
1199 | g->fifo.mmu_fault_engines |= | 1278 | mmu_fault_engines |= |
1200 | BIT(gk20a_engine_id_to_mmu_id(i)); | 1279 | BIT(gk20a_engine_id_to_mmu_id(i)); |
1201 | } | 1280 | } |
1202 | } | 1281 | } |
1203 | |||
1204 | } | 1282 | } |
1205 | 1283 | ||
1206 | /* | 1284 | /* |
@@ -1214,7 +1292,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | |||
1214 | fifo_intr_0_sched_error_reset_f()); | 1292 | fifo_intr_0_sched_error_reset_f()); |
1215 | 1293 | ||
1216 | g->ops.fifo.trigger_mmu_fault(g, engine_ids); | 1294 | g->ops.fifo.trigger_mmu_fault(g, engine_ids); |
1217 | gk20a_fifo_handle_mmu_fault(g); | 1295 | gk20a_fifo_handle_mmu_fault(g, engine_ids, hw_id, id_is_tsg); |
1218 | 1296 | ||
1219 | val = gk20a_readl(g, fifo_intr_en_0_r()); | 1297 | val = gk20a_readl(g, fifo_intr_en_0_r()); |
1220 | val |= fifo_intr_en_0_mmu_fault_f(1) | 1298 | val |= fifo_intr_en_0_mmu_fault_f(1) |
@@ -1222,25 +1300,32 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, | |||
1222 | gk20a_writel(g, fifo_intr_en_0_r(), val); | 1300 | gk20a_writel(g, fifo_intr_en_0_r(), val); |
1223 | } | 1301 | } |
1224 | 1302 | ||
1303 | /* force reset channel and tsg (if it's part of one) */ | ||
1225 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) | 1304 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) |
1226 | { | 1305 | { |
1227 | struct tsg_gk20a *tsg = NULL; | 1306 | struct tsg_gk20a *tsg = NULL; |
1228 | struct channel_gk20a *ch_tsg = NULL; | 1307 | struct channel_gk20a *ch_tsg = NULL; |
1308 | struct gk20a *g = ch->g; | ||
1229 | 1309 | ||
1230 | if (gk20a_is_channel_marked_as_tsg(ch)) { | 1310 | if (gk20a_is_channel_marked_as_tsg(ch)) { |
1231 | tsg = &ch->g->fifo.tsg[ch->hw_chid]; | 1311 | tsg = &g->fifo.tsg[ch->hw_chid]; |
1232 | 1312 | ||
1233 | mutex_lock(&tsg->ch_list_lock); | 1313 | mutex_lock(&tsg->ch_list_lock); |
1314 | |||
1234 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { | 1315 | list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { |
1235 | gk20a_set_error_notifier(ch_tsg, | 1316 | if (gk20a_channel_get(ch_tsg)) { |
1236 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | 1317 | gk20a_set_error_notifier(ch_tsg, |
1318 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | ||
1319 | gk20a_channel_put(ch_tsg); | ||
1320 | } | ||
1237 | } | 1321 | } |
1322 | |||
1238 | mutex_unlock(&tsg->ch_list_lock); | 1323 | mutex_unlock(&tsg->ch_list_lock); |
1239 | gk20a_fifo_recover_tsg(ch->g, ch->tsgid, verbose); | 1324 | gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); |
1240 | } else { | 1325 | } else { |
1241 | gk20a_set_error_notifier(ch, | 1326 | gk20a_set_error_notifier(ch, |
1242 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); | 1327 | NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); |
1243 | gk20a_fifo_recover_ch(ch->g, ch->hw_chid, verbose); | 1328 | gk20a_fifo_recover_ch(g, ch->hw_chid, verbose); |
1244 | } | 1329 | } |
1245 | 1330 | ||
1246 | return 0; | 1331 | return 0; |
@@ -1300,11 +1385,14 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) | |||
1300 | struct channel_gk20a *ch = &f->channel[id]; | 1385 | struct channel_gk20a *ch = &f->channel[id]; |
1301 | 1386 | ||
1302 | if (non_chid) { | 1387 | if (non_chid) { |
1303 | gk20a_fifo_recover(g, BIT(engine_id), true); | 1388 | gk20a_fifo_recover(g, BIT(engine_id), id, true, true); |
1304 | ret = true; | 1389 | ret = true; |
1305 | goto err; | 1390 | goto err; |
1306 | } | 1391 | } |
1307 | 1392 | ||
1393 | if (!gk20a_channel_get(ch)) | ||
1394 | goto err; | ||
1395 | |||
1308 | if (gk20a_channel_update_and_check_timeout(ch, | 1396 | if (gk20a_channel_update_and_check_timeout(ch, |
1309 | GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { | 1397 | GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { |
1310 | gk20a_set_error_notifier(ch, | 1398 | gk20a_set_error_notifier(ch, |
@@ -1313,7 +1401,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) | |||
1313 | "fifo sched ctxsw timeout error:" | 1401 | "fifo sched ctxsw timeout error:" |
1314 | "engine = %u, ch = %d", engine_id, id); | 1402 | "engine = %u, ch = %d", engine_id, id); |
1315 | gk20a_gr_debug_dump(g->dev); | 1403 | gk20a_gr_debug_dump(g->dev); |
1316 | gk20a_fifo_recover(g, BIT(engine_id), | 1404 | gk20a_fifo_recover(g, BIT(engine_id), id, false, |
1317 | ch->timeout_debug_dump); | 1405 | ch->timeout_debug_dump); |
1318 | ret = true; | 1406 | ret = true; |
1319 | } else { | 1407 | } else { |
@@ -1324,6 +1412,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) | |||
1324 | id); | 1412 | id); |
1325 | ret = false; | 1413 | ret = false; |
1326 | } | 1414 | } |
1415 | gk20a_channel_put(ch); | ||
1327 | return ret; | 1416 | return ret; |
1328 | } | 1417 | } |
1329 | 1418 | ||
@@ -1336,7 +1425,7 @@ err: | |||
1336 | 1425 | ||
1337 | static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) | 1426 | static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) |
1338 | { | 1427 | { |
1339 | bool print_channel_reset_log = false, reset_engine = false; | 1428 | bool print_channel_reset_log = false; |
1340 | struct device *dev = dev_from_gk20a(g); | 1429 | struct device *dev = dev_from_gk20a(g); |
1341 | u32 handled = 0; | 1430 | u32 handled = 0; |
1342 | 1431 | ||
@@ -1367,8 +1456,8 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) | |||
1367 | } | 1456 | } |
1368 | 1457 | ||
1369 | if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { | 1458 | if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { |
1370 | print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g); | 1459 | print_channel_reset_log = |
1371 | reset_engine = true; | 1460 | gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false); |
1372 | handled |= fifo_intr_0_mmu_fault_pending_f(); | 1461 | handled |= fifo_intr_0_mmu_fault_pending_f(); |
1373 | } | 1462 | } |
1374 | 1463 | ||
@@ -1452,9 +1541,12 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, | |||
1452 | == fifo_pbdma_status_id_type_chid_v()) { | 1541 | == fifo_pbdma_status_id_type_chid_v()) { |
1453 | struct channel_gk20a *ch = &f->channel[id]; | 1542 | struct channel_gk20a *ch = &f->channel[id]; |
1454 | 1543 | ||
1455 | gk20a_set_error_notifier(ch, | 1544 | if (gk20a_channel_get(ch)) { |
1456 | NVGPU_CHANNEL_PBDMA_ERROR); | 1545 | gk20a_set_error_notifier(ch, |
1457 | gk20a_fifo_recover_ch(g, id, true); | 1546 | NVGPU_CHANNEL_PBDMA_ERROR); |
1547 | gk20a_fifo_recover_ch(g, id, true); | ||
1548 | gk20a_channel_put(ch); | ||
1549 | } | ||
1458 | } else if (fifo_pbdma_status_id_type_v(status) | 1550 | } else if (fifo_pbdma_status_id_type_v(status) |
1459 | == fifo_pbdma_status_id_type_tsgid_v()) { | 1551 | == fifo_pbdma_status_id_type_tsgid_v()) { |
1460 | struct tsg_gk20a *tsg = &f->tsg[id]; | 1552 | struct tsg_gk20a *tsg = &f->tsg[id]; |
@@ -1462,8 +1554,11 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, | |||
1462 | 1554 | ||
1463 | mutex_lock(&tsg->ch_list_lock); | 1555 | mutex_lock(&tsg->ch_list_lock); |
1464 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | 1556 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { |
1465 | gk20a_set_error_notifier(ch, | 1557 | if (gk20a_channel_get(ch)) { |
1466 | NVGPU_CHANNEL_PBDMA_ERROR); | 1558 | gk20a_set_error_notifier(ch, |
1559 | NVGPU_CHANNEL_PBDMA_ERROR); | ||
1560 | gk20a_channel_put(ch); | ||
1561 | } | ||
1467 | } | 1562 | } |
1468 | mutex_unlock(&tsg->ch_list_lock); | 1563 | mutex_unlock(&tsg->ch_list_lock); |
1469 | gk20a_fifo_recover_tsg(g, id, true); | 1564 | gk20a_fifo_recover_tsg(g, id, true); |
@@ -1559,6 +1654,8 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) | |||
1559 | + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | 1654 | + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); |
1560 | u32 ret = 0; | 1655 | u32 ret = 0; |
1561 | 1656 | ||
1657 | gk20a_dbg_fn("%d", id); | ||
1658 | |||
1562 | /* issue preempt */ | 1659 | /* issue preempt */ |
1563 | if (is_tsg) | 1660 | if (is_tsg) |
1564 | gk20a_writel(g, fifo_preempt_r(), | 1661 | gk20a_writel(g, fifo_preempt_r(), |
@@ -1569,6 +1666,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) | |||
1569 | fifo_preempt_chid_f(id) | | 1666 | fifo_preempt_chid_f(id) | |
1570 | fifo_preempt_type_channel_f()); | 1667 | fifo_preempt_type_channel_f()); |
1571 | 1668 | ||
1669 | gk20a_dbg_fn("%d", id); | ||
1572 | /* wait for preempt */ | 1670 | /* wait for preempt */ |
1573 | ret = -EBUSY; | 1671 | ret = -EBUSY; |
1574 | do { | 1672 | do { |
@@ -1583,6 +1681,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) | |||
1583 | } while (time_before(jiffies, end_jiffies) || | 1681 | } while (time_before(jiffies, end_jiffies) || |
1584 | !tegra_platform_is_silicon()); | 1682 | !tegra_platform_is_silicon()); |
1585 | 1683 | ||
1684 | gk20a_dbg_fn("%d", id); | ||
1586 | if (ret) { | 1685 | if (ret) { |
1587 | if (is_tsg) { | 1686 | if (is_tsg) { |
1588 | struct tsg_gk20a *tsg = &g->fifo.tsg[id]; | 1687 | struct tsg_gk20a *tsg = &g->fifo.tsg[id]; |
@@ -1593,8 +1692,11 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) | |||
1593 | 1692 | ||
1594 | mutex_lock(&tsg->ch_list_lock); | 1693 | mutex_lock(&tsg->ch_list_lock); |
1595 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { | 1694 | list_for_each_entry(ch, &tsg->ch_list, ch_entry) { |
1695 | if (!gk20a_channel_get(ch)) | ||
1696 | continue; | ||
1596 | gk20a_set_error_notifier(ch, | 1697 | gk20a_set_error_notifier(ch, |
1597 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); | 1698 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); |
1699 | gk20a_channel_put(ch); | ||
1598 | } | 1700 | } |
1599 | mutex_unlock(&tsg->ch_list_lock); | 1701 | mutex_unlock(&tsg->ch_list_lock); |
1600 | gk20a_fifo_recover_tsg(g, id, true); | 1702 | gk20a_fifo_recover_tsg(g, id, true); |
@@ -1604,9 +1706,12 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) | |||
1604 | gk20a_err(dev_from_gk20a(g), | 1706 | gk20a_err(dev_from_gk20a(g), |
1605 | "preempt channel %d timeout\n", id); | 1707 | "preempt channel %d timeout\n", id); |
1606 | 1708 | ||
1607 | gk20a_set_error_notifier(ch, | 1709 | if (gk20a_channel_get(ch)) { |
1608 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); | 1710 | gk20a_set_error_notifier(ch, |
1609 | gk20a_fifo_recover_ch(g, id, true); | 1711 | NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); |
1712 | gk20a_fifo_recover_ch(g, id, true); | ||
1713 | gk20a_channel_put(ch); | ||
1714 | } | ||
1610 | } | 1715 | } |
1611 | } | 1716 | } |
1612 | 1717 | ||
@@ -1790,7 +1895,9 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id) | |||
1790 | (f->engine_info[i].runlist_id == runlist_id)) | 1895 | (f->engine_info[i].runlist_id == runlist_id)) |
1791 | engines |= BIT(i); | 1896 | engines |= BIT(i); |
1792 | } | 1897 | } |
1793 | gk20a_fifo_recover(g, engines, true); | 1898 | |
1899 | if (engines) | ||
1900 | gk20a_fifo_recover(g, engines, ~(u32)0, false, true); | ||
1794 | } | 1901 | } |
1795 | 1902 | ||
1796 | static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) | 1903 | static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) |
@@ -1994,6 +2101,8 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid, | |||
1994 | u32 mutex_ret; | 2101 | u32 mutex_ret; |
1995 | u32 ret = 0; | 2102 | u32 ret = 0; |
1996 | 2103 | ||
2104 | gk20a_dbg_fn(""); | ||
2105 | |||
1997 | runlist = &f->runlist_info[runlist_id]; | 2106 | runlist = &f->runlist_info[runlist_id]; |
1998 | 2107 | ||
1999 | mutex_lock(&runlist->mutex); | 2108 | mutex_lock(&runlist->mutex); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index dd320ae1..fdf843d2 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A graphics fifo (gr host) | 4 | * GK20A graphics fifo (gr host) |
5 | * | 5 | * |
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -106,7 +106,9 @@ struct fifo_gk20a { | |||
106 | u32 userd_entry_size; | 106 | u32 userd_entry_size; |
107 | 107 | ||
108 | struct channel_gk20a *channel; | 108 | struct channel_gk20a *channel; |
109 | struct mutex ch_inuse_mutex; /* protect unused chid look up */ | 109 | /* zero-kref'd channels here */ |
110 | struct list_head free_chs; | ||
111 | struct mutex free_chs_mutex; | ||
110 | 112 | ||
111 | struct tsg_gk20a *tsg; | 113 | struct tsg_gk20a *tsg; |
112 | struct mutex tsg_inuse_mutex; | 114 | struct mutex tsg_inuse_mutex; |
@@ -130,7 +132,7 @@ struct fifo_gk20a { | |||
130 | 132 | ||
131 | } intr; | 133 | } intr; |
132 | 134 | ||
133 | u32 mmu_fault_engines; | 135 | u32 deferred_fault_engines; |
134 | bool deferred_reset_pending; | 136 | bool deferred_reset_pending; |
135 | struct mutex deferred_reset_mutex; | 137 | struct mutex deferred_reset_mutex; |
136 | }; | 138 | }; |
@@ -157,7 +159,12 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid, | |||
157 | int gk20a_fifo_suspend(struct gk20a *g); | 159 | int gk20a_fifo_suspend(struct gk20a *g); |
158 | 160 | ||
159 | bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); | 161 | bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); |
160 | void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); | 162 | |
163 | void gk20a_fifo_recover(struct gk20a *g, | ||
164 | u32 engine_ids, /* if zero, will be queried from HW */ | ||
165 | u32 hw_id, /* if ~0, will be queried from HW */ | ||
166 | bool hw_id_is_tsg, /* ignored if hw_id == ~0 */ | ||
167 | bool verbose); | ||
161 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); | 168 | void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); |
162 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); | 169 | void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); |
163 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); | 170 | int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9c201f32..498de7e7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1388,6 +1388,9 @@ static int gk20a_probe(struct platform_device *dev) | |||
1388 | return -ENOMEM; | 1388 | return -ENOMEM; |
1389 | } | 1389 | } |
1390 | 1390 | ||
1391 | init_waitqueue_head(&gk20a->sw_irq_stall_last_handled_wq); | ||
1392 | init_waitqueue_head(&gk20a->sw_irq_nonstall_last_handled_wq); | ||
1393 | |||
1391 | #ifdef CONFIG_PM_GENERIC_DOMAINS_OF | 1394 | #ifdef CONFIG_PM_GENERIC_DOMAINS_OF |
1392 | gk20a_domain = container_of(dev_to_genpd(&dev->dev), | 1395 | gk20a_domain = container_of(dev_to_genpd(&dev->dev), |
1393 | struct gk20a_domain_data, gpd); | 1396 | struct gk20a_domain_data, gpd); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a52d97f3..d8e3586f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -538,6 +538,15 @@ struct gk20a { | |||
538 | u32 max_ltc_count; | 538 | u32 max_ltc_count; |
539 | u32 ltc_count; | 539 | u32 ltc_count; |
540 | 540 | ||
541 | atomic_t hw_irq_stall_count; | ||
542 | atomic_t hw_irq_nonstall_count; | ||
543 | |||
544 | atomic_t sw_irq_stall_last_handled; | ||
545 | wait_queue_head_t sw_irq_stall_last_handled_wq; | ||
546 | |||
547 | atomic_t sw_irq_nonstall_last_handled; | ||
548 | wait_queue_head_t sw_irq_nonstall_last_handled_wq; | ||
549 | |||
541 | struct devfreq *devfreq; | 550 | struct devfreq *devfreq; |
542 | 551 | ||
543 | struct gk20a_scale_profile *scale_profile; | 552 | struct gk20a_scale_profile *scale_profile; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index b2fea5b8..edd4c6c8 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g, | |||
5138 | * Also used by regops to translate current ctx to chid and tsgid. | 5138 | * Also used by regops to translate current ctx to chid and tsgid. |
5139 | * For performance, we don't want to go through 128 channels every time. | 5139 | * For performance, we don't want to go through 128 channels every time. |
5140 | * curr_ctx should be the value read from gr_fecs_current_ctx_r(). | 5140 | * curr_ctx should be the value read from gr_fecs_current_ctx_r(). |
5141 | * A small tlb is used here to cache translation */ | 5141 | * A small tlb is used here to cache translation. |
5142 | static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, | 5142 | * |
5143 | int *curr_tsgid) | 5143 | * Returned channel must be freed with gk20a_channel_put() */ |
5144 | static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( | ||
5145 | struct gk20a *g, u32 curr_ctx, int *curr_tsgid) | ||
5144 | { | 5146 | { |
5145 | struct fifo_gk20a *f = &g->fifo; | 5147 | struct fifo_gk20a *f = &g->fifo; |
5146 | struct gr_gk20a *gr = &g->gr; | 5148 | struct gr_gk20a *gr = &g->gr; |
5147 | u32 chid = -1; | 5149 | u32 chid = -1; |
5148 | int tsgid = NVGPU_INVALID_TSG_ID; | 5150 | int tsgid = NVGPU_INVALID_TSG_ID; |
5149 | u32 i; | 5151 | u32 i; |
5152 | struct channel_gk20a *ret = NULL; | ||
5150 | 5153 | ||
5151 | /* when contexts are unloaded from GR, the valid bit is reset | 5154 | /* when contexts are unloaded from GR, the valid bit is reset |
5152 | * but the instance pointer information remains intact. So the | 5155 | * but the instance pointer information remains intact. So the |
5153 | * valid bit must be checked to be absolutely certain that a | 5156 | * valid bit must be checked to be absolutely certain that a |
5154 | * valid context is currently resident. */ | 5157 | * valid context is currently resident. */ |
5155 | if (!gr_fecs_current_ctx_valid_v(curr_ctx)) | 5158 | if (!gr_fecs_current_ctx_valid_v(curr_ctx)) |
5156 | return -1; | 5159 | return NULL; |
5157 | 5160 | ||
5158 | spin_lock(&gr->ch_tlb_lock); | 5161 | spin_lock(&gr->ch_tlb_lock); |
5159 | 5162 | ||
@@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, | |||
5162 | if (gr->chid_tlb[i].curr_ctx == curr_ctx) { | 5165 | if (gr->chid_tlb[i].curr_ctx == curr_ctx) { |
5163 | chid = gr->chid_tlb[i].hw_chid; | 5166 | chid = gr->chid_tlb[i].hw_chid; |
5164 | tsgid = gr->chid_tlb[i].tsgid; | 5167 | tsgid = gr->chid_tlb[i].tsgid; |
5168 | ret = gk20a_channel_get(&f->channel[chid]); | ||
5165 | goto unlock; | 5169 | goto unlock; |
5166 | } | 5170 | } |
5167 | } | 5171 | } |
5168 | 5172 | ||
5169 | /* slow path */ | 5173 | /* slow path */ |
5170 | for (chid = 0; chid < f->num_channels; chid++) | 5174 | for (chid = 0; chid < f->num_channels; chid++) { |
5171 | if (f->channel[chid].in_use) { | 5175 | struct channel_gk20a *ch = &f->channel[chid]; |
5172 | if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> | 5176 | if (!gk20a_channel_get(ch)) |
5173 | ram_in_base_shift_v()) == | 5177 | continue; |
5178 | |||
5179 | if ((u32)(gk20a_mem_phys(&ch->inst_block) >> | ||
5180 | ram_in_base_shift_v()) == | ||
5174 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { | 5181 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { |
5175 | tsgid = f->channel[chid].tsgid; | 5182 | tsgid = ch->tsgid; |
5176 | break; | 5183 | /* found it */ |
5177 | } | 5184 | ret = ch; |
5185 | break; | ||
5186 | } | ||
5187 | gk20a_channel_put(ch); | ||
5178 | } | 5188 | } |
5179 | 5189 | ||
5180 | if (chid >= f->num_channels) { | 5190 | if (!ret) |
5181 | chid = -1; | ||
5182 | goto unlock; | 5191 | goto unlock; |
5183 | } | ||
5184 | 5192 | ||
5185 | /* add to free tlb entry */ | 5193 | /* add to free tlb entry */ |
5186 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { | 5194 | for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { |
@@ -5205,7 +5213,7 @@ unlock: | |||
5205 | spin_unlock(&gr->ch_tlb_lock); | 5213 | spin_unlock(&gr->ch_tlb_lock); |
5206 | if (curr_tsgid) | 5214 | if (curr_tsgid) |
5207 | *curr_tsgid = tsgid; | 5215 | *curr_tsgid = tsgid; |
5208 | return chid; | 5216 | return ret; |
5209 | } | 5217 | } |
5210 | 5218 | ||
5211 | int gk20a_gr_lock_down_sm(struct gk20a *g, | 5219 | int gk20a_gr_lock_down_sm(struct gk20a *g, |
@@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5399 | u32 obj_table; | 5407 | u32 obj_table; |
5400 | int need_reset = 0; | 5408 | int need_reset = 0; |
5401 | u32 gr_intr = gk20a_readl(g, gr_intr_r()); | 5409 | u32 gr_intr = gk20a_readl(g, gr_intr_r()); |
5410 | struct channel_gk20a *ch = NULL; | ||
5402 | 5411 | ||
5403 | gk20a_dbg_fn(""); | 5412 | gk20a_dbg_fn(""); |
5404 | gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); | 5413 | gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); |
@@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5424 | gr_fe_object_table_r(isr_data.sub_chan)) : 0; | 5433 | gr_fe_object_table_r(isr_data.sub_chan)) : 0; |
5425 | isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); | 5434 | isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); |
5426 | 5435 | ||
5427 | isr_data.chid = | 5436 | ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL); |
5428 | gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); | 5437 | if (!ch) { |
5429 | if (isr_data.chid == -1) { | ||
5430 | gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", | 5438 | gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", |
5431 | isr_data.curr_ctx); | 5439 | isr_data.curr_ctx); |
5432 | goto clean_up; | 5440 | goto clean_up; |
5433 | } | 5441 | } |
5442 | isr_data.chid = ch->hw_chid; | ||
5434 | 5443 | ||
5435 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5444 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5436 | "channel %d: addr 0x%08x, " | 5445 | "channel %d: addr 0x%08x, " |
@@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5512 | 5521 | ||
5513 | if (gr_intr & gr_intr_exception_pending_f()) { | 5522 | if (gr_intr & gr_intr_exception_pending_f()) { |
5514 | u32 exception = gk20a_readl(g, gr_exception_r()); | 5523 | u32 exception = gk20a_readl(g, gr_exception_r()); |
5515 | struct fifo_gk20a *f = &g->fifo; | ||
5516 | struct channel_gk20a *ch = &f->channel[isr_data.chid]; | ||
5517 | 5524 | ||
5518 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); | 5525 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); |
5519 | 5526 | ||
@@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g) | |||
5572 | } | 5579 | } |
5573 | 5580 | ||
5574 | if (need_reset) | 5581 | if (need_reset) |
5575 | gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); | 5582 | gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), |
5583 | ~(u32)0, false, true); | ||
5576 | 5584 | ||
5577 | clean_up: | 5585 | clean_up: |
5586 | if (gr_intr && !ch) { | ||
5587 | /* Clear interrupts for unused channel. This is | ||
5588 | probably an interrupt during gk20a_free_channel() */ | ||
5589 | gk20a_err(dev_from_gk20a(g), | ||
5590 | "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing", | ||
5591 | gr_intr); | ||
5592 | gk20a_writel(g, gr_intr_r(), gr_intr); | ||
5593 | gr_intr = 0; | ||
5594 | } | ||
5595 | |||
5578 | gk20a_writel(g, gr_gpfifo_ctl_r(), | 5596 | gk20a_writel(g, gr_gpfifo_ctl_r(), |
5579 | grfifo_ctl | gr_gpfifo_ctl_access_f(1) | | 5597 | grfifo_ctl | gr_gpfifo_ctl_access_f(1) | |
5580 | gr_gpfifo_ctl_semaphore_access_f(1)); | 5598 | gr_gpfifo_ctl_semaphore_access_f(1)); |
@@ -5583,6 +5601,9 @@ clean_up: | |||
5583 | gk20a_err(dev_from_gk20a(g), | 5601 | gk20a_err(dev_from_gk20a(g), |
5584 | "unhandled gr interrupt 0x%08x", gr_intr); | 5602 | "unhandled gr interrupt 0x%08x", gr_intr); |
5585 | 5603 | ||
5604 | if (ch) | ||
5605 | gk20a_channel_put(ch); | ||
5606 | |||
5586 | return 0; | 5607 | return 0; |
5587 | } | 5608 | } |
5588 | 5609 | ||
@@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
6670 | 6691 | ||
6671 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | 6692 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) |
6672 | { | 6693 | { |
6673 | int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; | 6694 | int curr_gr_ctx, curr_gr_tsgid; |
6674 | struct gk20a *g = ch->g; | 6695 | struct gk20a *g = ch->g; |
6696 | struct channel_gk20a *curr_ch; | ||
6697 | bool ret = false; | ||
6675 | 6698 | ||
6676 | curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); | 6699 | curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); |
6677 | curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, | 6700 | curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx, |
6678 | &curr_gr_tsgid); | 6701 | &curr_gr_tsgid); |
6679 | 6702 | ||
6680 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | 6703 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, |
6681 | "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" | 6704 | "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" |
6682 | " ch->hw_chid=%d", curr_gr_chid, | 6705 | " ch->hw_chid=%d", |
6683 | curr_gr_tsgid, ch->tsgid, ch->hw_chid); | 6706 | curr_ch ? curr_ch->hw_chid : -1, |
6684 | 6707 | curr_gr_tsgid, | |
6685 | if (curr_gr_chid == -1) | 6708 | ch->tsgid, |
6709 | ch->hw_chid); | ||
6710 | |||
6711 | if (!curr_ch) | ||
6686 | return false; | 6712 | return false; |
6687 | 6713 | ||
6688 | if (ch->hw_chid == curr_gr_chid) | 6714 | if (ch->hw_chid == curr_ch->hw_chid) |
6689 | return true; | 6715 | ret = true; |
6690 | 6716 | ||
6691 | if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) | 6717 | if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) |
6692 | return true; | 6718 | ret = true; |
6693 | 6719 | ||
6694 | return false; | 6720 | gk20a_channel_put(curr_ch); |
6721 | return ret; | ||
6695 | } | 6722 | } |
6696 | 6723 | ||
6697 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | 6724 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c index 06b00a25..0a773d10 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c | |||
@@ -40,6 +40,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g) | |||
40 | /* flush previous write */ | 40 | /* flush previous write */ |
41 | gk20a_readl(g, mc_intr_en_0_r()); | 41 | gk20a_readl(g, mc_intr_en_0_r()); |
42 | 42 | ||
43 | atomic_inc(&g->hw_irq_stall_count); | ||
44 | |||
43 | trace_mc_gk20a_intr_stall_done(g->dev->name); | 45 | trace_mc_gk20a_intr_stall_done(g->dev->name); |
44 | 46 | ||
45 | return IRQ_WAKE_THREAD; | 47 | return IRQ_WAKE_THREAD; |
@@ -63,18 +65,22 @@ irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g) | |||
63 | /* flush previous write */ | 65 | /* flush previous write */ |
64 | gk20a_readl(g, mc_intr_en_1_r()); | 66 | gk20a_readl(g, mc_intr_en_1_r()); |
65 | 67 | ||
68 | atomic_inc(&g->hw_irq_nonstall_count); | ||
69 | |||
66 | return IRQ_WAKE_THREAD; | 70 | return IRQ_WAKE_THREAD; |
67 | } | 71 | } |
68 | 72 | ||
69 | irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) | 73 | irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) |
70 | { | 74 | { |
71 | u32 mc_intr_0; | 75 | u32 mc_intr_0; |
76 | int hw_irq_count; | ||
72 | 77 | ||
73 | gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); | 78 | gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); |
74 | 79 | ||
75 | trace_mc_gk20a_intr_thread_stall(g->dev->name); | 80 | trace_mc_gk20a_intr_thread_stall(g->dev->name); |
76 | 81 | ||
77 | mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); | 82 | mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); |
83 | hw_irq_count = atomic_read(&g->hw_irq_stall_count); | ||
78 | 84 | ||
79 | gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); | 85 | gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); |
80 | 86 | ||
@@ -94,12 +100,17 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) | |||
94 | if (mc_intr_0 & mc_intr_0_pbus_pending_f()) | 100 | if (mc_intr_0 & mc_intr_0_pbus_pending_f()) |
95 | gk20a_pbus_isr(g); | 101 | gk20a_pbus_isr(g); |
96 | 102 | ||
103 | /* sync handled irq counter before re-enabling interrupts */ | ||
104 | atomic_set(&g->sw_irq_stall_last_handled, hw_irq_count); | ||
105 | |||
97 | gk20a_writel(g, mc_intr_en_0_r(), | 106 | gk20a_writel(g, mc_intr_en_0_r(), |
98 | mc_intr_en_0_inta_hardware_f()); | 107 | mc_intr_en_0_inta_hardware_f()); |
99 | 108 | ||
100 | /* flush previous write */ | 109 | /* flush previous write */ |
101 | gk20a_readl(g, mc_intr_en_0_r()); | 110 | gk20a_readl(g, mc_intr_en_0_r()); |
102 | 111 | ||
112 | wake_up_all(&g->sw_irq_stall_last_handled_wq); | ||
113 | |||
103 | trace_mc_gk20a_intr_thread_stall_done(g->dev->name); | 114 | trace_mc_gk20a_intr_thread_stall_done(g->dev->name); |
104 | 115 | ||
105 | return IRQ_HANDLED; | 116 | return IRQ_HANDLED; |
@@ -108,10 +119,12 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) | |||
108 | irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) | 119 | irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) |
109 | { | 120 | { |
110 | u32 mc_intr_1; | 121 | u32 mc_intr_1; |
122 | int hw_irq_count; | ||
111 | 123 | ||
112 | gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); | 124 | gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); |
113 | 125 | ||
114 | mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); | 126 | mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); |
127 | hw_irq_count = atomic_read(&g->hw_irq_nonstall_count); | ||
115 | 128 | ||
116 | gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); | 129 | gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); |
117 | 130 | ||
@@ -125,12 +138,17 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) | |||
125 | && g->ops.ce2.isr_nonstall) | 138 | && g->ops.ce2.isr_nonstall) |
126 | g->ops.ce2.isr_nonstall(g); | 139 | g->ops.ce2.isr_nonstall(g); |
127 | 140 | ||
141 | /* sync handled irq counter before re-enabling interrupts */ | ||
142 | atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count); | ||
143 | |||
128 | gk20a_writel(g, mc_intr_en_1_r(), | 144 | gk20a_writel(g, mc_intr_en_1_r(), |
129 | mc_intr_en_1_inta_hardware_f()); | 145 | mc_intr_en_1_inta_hardware_f()); |
130 | 146 | ||
131 | /* flush previous write */ | 147 | /* flush previous write */ |
132 | gk20a_readl(g, mc_intr_en_1_r()); | 148 | gk20a_readl(g, mc_intr_en_1_r()); |
133 | 149 | ||
150 | wake_up_all(&g->sw_irq_stall_last_handled_wq); | ||
151 | |||
134 | return IRQ_HANDLED; | 152 | return IRQ_HANDLED; |
135 | } | 153 | } |
136 | 154 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 68a31eca..23ff8677 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -283,6 +283,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
283 | 283 | ||
284 | init_runlist(g, f); | 284 | init_runlist(g, f); |
285 | 285 | ||
286 | INIT_LIST_HEAD(&f->free_chs); | ||
287 | mutex_init(&f->free_chs_mutex); | ||
288 | |||
286 | for (chid = 0; chid < f->num_channels; chid++) { | 289 | for (chid = 0; chid < f->num_channels; chid++) { |
287 | f->channel[chid].userd_cpu_va = | 290 | f->channel[chid].userd_cpu_va = |
288 | f->userd.cpu_va + chid * f->userd_entry_size; | 291 | f->userd.cpu_va + chid * f->userd_entry_size; |
@@ -294,7 +297,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
294 | 297 | ||
295 | gk20a_init_channel_support(g, chid); | 298 | gk20a_init_channel_support(g, chid); |
296 | } | 299 | } |
297 | mutex_init(&f->ch_inuse_mutex); | ||
298 | 300 | ||
299 | f->deferred_reset_pending = false; | 301 | f->deferred_reset_pending = false; |
300 | mutex_init(&f->deferred_reset_mutex); | 302 | mutex_init(&f->deferred_reset_mutex); |