diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2014-10-23 07:10:57 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:11:58 -0400 |
commit | 14577a339ccc160ed58f8d936ebcbd96dba3b6ca (patch) | |
tree | 2e55969ea66a15b23f799e0054d4cba4ab6d85fe /drivers/gpu/nvgpu | |
parent | b1088fe769ea900438a39c9e9920157b4ba7436a (diff) |
gpu: nvgpu: cde: list for contexts, defer deletion
Instead of current preallocated array plus dynamically allocated
temporary contexts, use a linked list in LRU fashion, always storing
free contexts at the beginning of the list. Initialize the preallocated
contexts to the list and store dynamically allocated temporaries there
too for quick reuse as needed, with a delayed scheduled work for
deleting temporaries when the high load has diminished.
Bug 200040211
Change-Id: Ibc75a0150109ec9c44b2eeb74607450990584b18
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/562856
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 300 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 |
3 files changed, 237 insertions, 77 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ee62f02a..9067aae5 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -34,7 +34,10 @@ | |||
34 | #include "hw_ccsr_gk20a.h" | 34 | #include "hw_ccsr_gk20a.h" |
35 | #include "hw_pbdma_gk20a.h" | 35 | #include "hw_pbdma_gk20a.h" |
36 | 36 | ||
37 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx, bool free_after_use); | 37 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); |
38 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); | ||
39 | |||
40 | #define CTX_DELETE_TIME 1000 | ||
38 | 41 | ||
39 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | 42 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) |
40 | { | 43 | { |
@@ -67,7 +70,7 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | |||
67 | cde_ctx->init_cmd_executed = false; | 70 | cde_ctx->init_cmd_executed = false; |
68 | } | 71 | } |
69 | 72 | ||
70 | static int gk20a_cde_remove(struct gk20a_cde_ctx *cde_ctx) | 73 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) |
71 | { | 74 | { |
72 | struct gk20a *g = cde_ctx->g; | 75 | struct gk20a *g = cde_ctx->g; |
73 | struct channel_gk20a *ch = cde_ctx->ch; | 76 | struct channel_gk20a *ch = cde_ctx->ch; |
@@ -81,23 +84,90 @@ static int gk20a_cde_remove(struct gk20a_cde_ctx *cde_ctx) | |||
81 | gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr, | 84 | gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr, |
82 | g->gr.compbit_store.size, 1); | 85 | g->gr.compbit_store.size, 1); |
83 | 86 | ||
84 | return 0; | 87 | /* housekeeping on app */ |
88 | list_del(&cde_ctx->list); | ||
89 | cde_ctx->g->cde_app.lru_len--; | ||
90 | kfree(cde_ctx); | ||
91 | } | ||
92 | |||
93 | static void gk20a_cde_prepare_ctx_remove(struct gk20a_cde_ctx *cde_ctx) | ||
94 | { | ||
95 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
96 | |||
97 | /* permanent contexts do not have deleter works */ | ||
98 | if (!cde_ctx->is_temporary) | ||
99 | return; | ||
100 | |||
101 | /* safe to go off the mutex since app is deinitialised. deleter works | ||
102 | * may be only at waiting for the mutex or before, going to abort */ | ||
103 | mutex_unlock(&cde_app->mutex); | ||
104 | |||
105 | /* the deleter can rearm itself */ | ||
106 | do { | ||
107 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
108 | } while (delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
109 | |||
110 | mutex_lock(&cde_app->mutex); | ||
85 | } | 111 | } |
86 | 112 | ||
87 | int gk20a_cde_destroy(struct gk20a *g) | 113 | static void gk20a_cde_deallocate_contexts(struct gk20a *g) |
88 | { | 114 | { |
89 | struct gk20a_cde_app *cde_app = &g->cde_app; | 115 | struct gk20a_cde_app *cde_app = &g->cde_app; |
90 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | 116 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; |
91 | int ret, i; | ||
92 | 117 | ||
93 | if (!cde_app->initialised) | 118 | list_for_each_entry_safe(cde_ctx, cde_ctx_save, |
94 | return 0; | 119 | &cde_app->cde_ctx_lru, list) { |
120 | gk20a_cde_prepare_ctx_remove(cde_ctx); | ||
121 | gk20a_cde_remove_ctx(cde_ctx); | ||
122 | } | ||
123 | } | ||
95 | 124 | ||
96 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) | 125 | void gk20a_cde_stop(struct gk20a *g) |
97 | ret = gk20a_cde_remove(cde_ctx); | 126 | { |
127 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
98 | 128 | ||
129 | /* prevent further conversions and delayed works from working */ | ||
99 | cde_app->initialised = false; | 130 | cde_app->initialised = false; |
100 | return ret; | 131 | /* free all data, empty the list */ |
132 | gk20a_cde_deallocate_contexts(g); | ||
133 | } | ||
134 | |||
135 | void gk20a_cde_destroy(struct gk20a *g) | ||
136 | { | ||
137 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
138 | |||
139 | if (!cde_app->initialised) | ||
140 | return; | ||
141 | |||
142 | mutex_lock(&cde_app->mutex); | ||
143 | gk20a_cde_stop(g); | ||
144 | mutex_unlock(&cde_app->mutex); | ||
145 | } | ||
146 | |||
147 | static int gk20a_cde_allocate_contexts(struct gk20a *g) | ||
148 | { | ||
149 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
150 | struct gk20a_cde_ctx *cde_ctx; | ||
151 | int err = 0; | ||
152 | int i; | ||
153 | |||
154 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
155 | cde_ctx = gk20a_cde_allocate_context(g); | ||
156 | if (IS_ERR(cde_ctx)) { | ||
157 | err = PTR_ERR(cde_ctx); | ||
158 | goto out; | ||
159 | } | ||
160 | |||
161 | list_add(&cde_ctx->list, &cde_app->cde_ctx_lru); | ||
162 | cde_app->lru_len++; | ||
163 | if (cde_app->lru_len > cde_app->lru_max_len) | ||
164 | cde_app->lru_max_len = cde_app->lru_len; | ||
165 | } | ||
166 | |||
167 | return 0; | ||
168 | out: | ||
169 | gk20a_cde_deallocate_contexts(g); | ||
170 | return err; | ||
101 | } | 171 | } |
102 | 172 | ||
103 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | 173 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, |
@@ -591,29 +661,117 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | |||
591 | num_entries, flags, fence, fence_out); | 661 | num_entries, flags, fence, fence_out); |
592 | } | 662 | } |
593 | 663 | ||
664 | static void gk20a_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
665 | { | ||
666 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
667 | |||
668 | gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
669 | |||
670 | mutex_lock(&cde_app->mutex); | ||
671 | |||
672 | cde_ctx->in_use = false; | ||
673 | list_move(&cde_ctx->list, &cde_app->cde_ctx_lru); | ||
674 | cde_app->lru_used--; | ||
675 | |||
676 | mutex_unlock(&cde_app->mutex); | ||
677 | } | ||
678 | |||
679 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
680 | { | ||
681 | struct delayed_work *delay_work = to_delayed_work(work); | ||
682 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
683 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
684 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
685 | struct platform_device *pdev = cde_ctx->pdev; | ||
686 | int err; | ||
687 | |||
688 | /* someone has just taken it? engine deletion started? */ | ||
689 | if (cde_ctx->in_use || !cde_app->initialised) | ||
690 | return; | ||
691 | |||
692 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
693 | "cde: attempting to delete temporary %p", cde_ctx); | ||
694 | |||
695 | /* this should fail only when shutting down the whole device */ | ||
696 | err = gk20a_busy(pdev); | ||
697 | if (WARN(err, "gk20a cde: cannot set gk20a on, not freeing channel yet." | ||
698 | " rescheduling...")) { | ||
699 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
700 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
701 | return; | ||
702 | } | ||
703 | |||
704 | /* mark so that nobody else assumes it's free to take */ | ||
705 | mutex_lock(&cde_app->mutex); | ||
706 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
707 | gk20a_dbg(gpu_dbg_cde_ctx, | ||
708 | "cde: context use raced, not deleting %p", | ||
709 | cde_ctx); | ||
710 | goto out; | ||
711 | } | ||
712 | cde_ctx->in_use = true; | ||
713 | |||
714 | gk20a_cde_remove_ctx(cde_ctx); | ||
715 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
716 | "cde: destroyed %p len=%d use=%d max=%d", | ||
717 | cde_ctx, cde_app->lru_len, cde_app->lru_used, | ||
718 | cde_app->lru_max_len); | ||
719 | |||
720 | out: | ||
721 | mutex_unlock(&cde_app->mutex); | ||
722 | gk20a_idle(pdev); | ||
723 | } | ||
724 | |||
594 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) | 725 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) |
595 | { | 726 | { |
596 | struct gk20a_cde_app *cde_app = &g->cde_app; | 727 | struct gk20a_cde_app *cde_app = &g->cde_app; |
597 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | 728 | struct gk20a_cde_ctx *cde_ctx; |
598 | int i, ret; | ||
599 | 729 | ||
600 | /* try to find a jobless context */ | 730 | /* try to get a jobless context. list is in lru order */ |
731 | |||
732 | cde_ctx = list_first_entry(&cde_app->cde_ctx_lru, | ||
733 | struct gk20a_cde_ctx, list); | ||
734 | |||
735 | if (!cde_ctx->in_use) { | ||
736 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
737 | "cde: got free %p len=%d use=%d max=%d", | ||
738 | cde_ctx, cde_app->lru_len, cde_app->lru_used, | ||
739 | cde_app->lru_max_len); | ||
740 | /* deleter work may be scheduled, but in_use prevents it */ | ||
741 | cde_ctx->in_use = true; | ||
742 | list_move_tail(&cde_ctx->list, &cde_app->cde_ctx_lru); | ||
743 | cde_app->lru_used++; | ||
744 | return cde_ctx; | ||
745 | } | ||
601 | 746 | ||
602 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) { | 747 | /* no free contexts, get a temporary one */ |
603 | struct channel_gk20a *ch = cde_ctx->ch; | ||
604 | bool empty; | ||
605 | 748 | ||
606 | mutex_lock(&ch->jobs_lock); | 749 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, |
607 | empty = list_empty(&ch->jobs); | 750 | "cde: no free contexts, list len=%d", |
608 | mutex_unlock(&ch->jobs_lock); | 751 | cde_app->lru_len); |
609 | 752 | ||
610 | if (empty) | 753 | cde_ctx = gk20a_cde_allocate_context(g); |
611 | return cde_ctx; | 754 | if (IS_ERR(cde_ctx)) { |
755 | gk20a_warn(&g->dev->dev, "cde: cannot allocate context: %ld", | ||
756 | PTR_ERR(cde_ctx)); | ||
757 | return cde_ctx; | ||
612 | } | 758 | } |
613 | 759 | ||
614 | /* could not find a free one, so allocate dynamically */ | 760 | cde_ctx->in_use = true; |
761 | cde_ctx->is_temporary = true; | ||
762 | list_add_tail(&cde_ctx->list, &cde_app->cde_ctx_lru); | ||
763 | cde_app->lru_used++; | ||
764 | cde_app->lru_len++; | ||
765 | if (cde_app->lru_len > cde_app->lru_max_len) | ||
766 | cde_app->lru_max_len = cde_app->lru_len; | ||
767 | |||
768 | return cde_ctx; | ||
769 | } | ||
615 | 770 | ||
616 | gk20a_warn(&g->dev->dev, "cde: no free contexts, allocating temporary"); | 771 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) |
772 | { | ||
773 | struct gk20a_cde_ctx *cde_ctx; | ||
774 | int ret; | ||
617 | 775 | ||
618 | cde_ctx = kzalloc(sizeof(*cde_ctx), GFP_KERNEL); | 776 | cde_ctx = kzalloc(sizeof(*cde_ctx), GFP_KERNEL); |
619 | if (!cde_ctx) | 777 | if (!cde_ctx) |
@@ -622,12 +780,19 @@ static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) | |||
622 | cde_ctx->g = g; | 780 | cde_ctx->g = g; |
623 | cde_ctx->pdev = g->dev; | 781 | cde_ctx->pdev = g->dev; |
624 | 782 | ||
625 | ret = gk20a_cde_load(cde_ctx, true); | 783 | ret = gk20a_cde_load(cde_ctx); |
626 | if (ret) { | 784 | if (ret) { |
627 | gk20a_err(&g->dev->dev, "cde: cde load failed on temporary"); | 785 | kfree(cde_ctx); |
628 | return ERR_PTR(ret); | 786 | return ERR_PTR(ret); |
629 | } | 787 | } |
630 | 788 | ||
789 | INIT_LIST_HEAD(&cde_ctx->list); | ||
790 | cde_ctx->is_temporary = false; | ||
791 | cde_ctx->in_use = false; | ||
792 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
793 | gk20a_cde_ctx_deleter_fn); | ||
794 | |||
795 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
631 | return cde_ctx; | 796 | return cde_ctx; |
632 | } | 797 | } |
633 | 798 | ||
@@ -653,8 +818,10 @@ int gk20a_cde_convert(struct gk20a *g, | |||
653 | mutex_lock(&cde_app->mutex); | 818 | mutex_lock(&cde_app->mutex); |
654 | 819 | ||
655 | cde_ctx = gk20a_cde_get_context(g); | 820 | cde_ctx = gk20a_cde_get_context(g); |
656 | if (IS_ERR(cde_ctx)) | 821 | if (IS_ERR(cde_ctx)) { |
657 | return PTR_ERR(cde_ctx); | 822 | err = PTR_ERR(cde_ctx); |
823 | goto exit_unlock; | ||
824 | } | ||
658 | 825 | ||
659 | /* First, map the buffers to local va */ | 826 | /* First, map the buffers to local va */ |
660 | 827 | ||
@@ -665,7 +832,7 @@ int gk20a_cde_convert(struct gk20a *g, | |||
665 | 832 | ||
666 | /* map the destination buffer */ | 833 | /* map the destination buffer */ |
667 | get_dma_buf(dst); /* a ref for gk20a_vm_map */ | 834 | get_dma_buf(dst); /* a ref for gk20a_vm_map */ |
668 | dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, | 835 | dst_vaddr = gk20a_vm_map(cde_ctx->vm, dst, 0, |
669 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 836 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
670 | dst_kind, NULL, true, | 837 | dst_kind, NULL, true, |
671 | gk20a_mem_flag_none, | 838 | gk20a_mem_flag_none, |
@@ -757,18 +924,17 @@ exit_unlock: | |||
757 | 924 | ||
758 | /* unmap the buffers - channel holds references to them now */ | 925 | /* unmap the buffers - channel holds references to them now */ |
759 | if (dst_vaddr) | 926 | if (dst_vaddr) |
760 | gk20a_vm_unmap(g->cde_app.vm, dst_vaddr); | 927 | gk20a_vm_unmap(cde_ctx->vm, dst_vaddr); |
761 | 928 | ||
762 | mutex_unlock(&cde_app->mutex); | 929 | mutex_unlock(&cde_app->mutex); |
763 | 930 | ||
764 | return err; | 931 | return err; |
765 | } | 932 | } |
766 | 933 | ||
767 | static void gk20a_free_ctx_cb(struct channel_gk20a *ch, void *data) | 934 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) |
768 | { | 935 | { |
769 | struct gk20a_cde_ctx *cde_ctx = data; | 936 | struct gk20a_cde_ctx *cde_ctx = data; |
770 | bool empty; | 937 | bool empty; |
771 | int err; | ||
772 | 938 | ||
773 | mutex_lock(&ch->jobs_lock); | 939 | mutex_lock(&ch->jobs_lock); |
774 | empty = list_empty(&ch->jobs); | 940 | empty = list_empty(&ch->jobs); |
@@ -777,19 +943,17 @@ static void gk20a_free_ctx_cb(struct channel_gk20a *ch, void *data) | |||
777 | if (!empty) | 943 | if (!empty) |
778 | return; | 944 | return; |
779 | 945 | ||
780 | /* this should fail only when shutting down the whole device */ | 946 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); |
781 | err = gk20a_busy(cde_ctx->pdev); | ||
782 | if (WARN(err, "gk20a cde: cannot set gk20a on, not freeing channel" | ||
783 | ", leaking memory")) | ||
784 | return; | ||
785 | 947 | ||
786 | gk20a_cde_remove(cde_ctx); | 948 | /* delete temporary contexts later */ |
787 | gk20a_idle(cde_ctx->pdev); | 949 | if (cde_ctx->is_temporary) |
950 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
951 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
788 | 952 | ||
789 | kfree(cde_ctx); | 953 | gk20a_ctx_release(cde_ctx); |
790 | } | 954 | } |
791 | 955 | ||
792 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx, bool free_after_use) | 956 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) |
793 | { | 957 | { |
794 | struct gk20a *g = cde_ctx->g; | 958 | struct gk20a *g = cde_ctx->g; |
795 | const struct firmware *img; | 959 | const struct firmware *img; |
@@ -804,10 +968,8 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx, bool free_after_use) | |||
804 | return -ENOSYS; | 968 | return -ENOSYS; |
805 | } | 969 | } |
806 | 970 | ||
807 | if (free_after_use) | 971 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, |
808 | ch = gk20a_open_new_channel_with_cb(g, gk20a_free_ctx_cb, cde_ctx); | 972 | cde_ctx); |
809 | else | ||
810 | ch = gk20a_open_new_channel(g); | ||
811 | if (!ch) { | 973 | if (!ch) { |
812 | gk20a_warn(&cde_ctx->pdev->dev, "cde: gk20a channel not available"); | 974 | gk20a_warn(&cde_ctx->pdev->dev, "cde: gk20a channel not available"); |
813 | err = -ENOMEM; | 975 | err = -ENOMEM; |
@@ -876,8 +1038,7 @@ err_get_gk20a_channel: | |||
876 | int gk20a_cde_reload(struct gk20a *g) | 1038 | int gk20a_cde_reload(struct gk20a *g) |
877 | { | 1039 | { |
878 | struct gk20a_cde_app *cde_app = &g->cde_app; | 1040 | struct gk20a_cde_app *cde_app = &g->cde_app; |
879 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | 1041 | int err; |
880 | int err, i; | ||
881 | 1042 | ||
882 | if (!cde_app->initialised) | 1043 | if (!cde_app->initialised) |
883 | return -ENOSYS; | 1044 | return -ENOSYS; |
@@ -887,10 +1048,12 @@ int gk20a_cde_reload(struct gk20a *g) | |||
887 | return err; | 1048 | return err; |
888 | 1049 | ||
889 | mutex_lock(&cde_app->mutex); | 1050 | mutex_lock(&cde_app->mutex); |
890 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) { | 1051 | |
891 | gk20a_cde_remove(cde_ctx); | 1052 | gk20a_cde_stop(g); |
892 | err = gk20a_cde_load(cde_ctx, false); | 1053 | |
893 | } | 1054 | err = gk20a_cde_allocate_contexts(g); |
1055 | if (!err) | ||
1056 | cde_app->initialised = true; | ||
894 | 1057 | ||
895 | mutex_unlock(&cde_app->mutex); | 1058 | mutex_unlock(&cde_app->mutex); |
896 | 1059 | ||
@@ -901,39 +1064,28 @@ int gk20a_cde_reload(struct gk20a *g) | |||
901 | int gk20a_init_cde_support(struct gk20a *g) | 1064 | int gk20a_init_cde_support(struct gk20a *g) |
902 | { | 1065 | { |
903 | struct gk20a_cde_app *cde_app = &g->cde_app; | 1066 | struct gk20a_cde_app *cde_app = &g->cde_app; |
904 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | 1067 | int err; |
905 | int ret, i; | ||
906 | 1068 | ||
907 | if (cde_app->initialised) | 1069 | if (cde_app->initialised) |
908 | return 0; | 1070 | return 0; |
909 | 1071 | ||
1072 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1073 | |||
910 | mutex_init(&cde_app->mutex); | 1074 | mutex_init(&cde_app->mutex); |
911 | mutex_lock(&cde_app->mutex); | 1075 | mutex_lock(&cde_app->mutex); |
912 | 1076 | ||
913 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) { | 1077 | INIT_LIST_HEAD(&cde_app->cde_ctx_lru); |
914 | cde_ctx->g = g; | 1078 | cde_app->lru_len = 0; |
915 | cde_ctx->pdev = g->dev; | 1079 | cde_app->lru_max_len = 0; |
916 | ret = gk20a_cde_load(cde_ctx, false); | 1080 | cde_app->lru_used = 0; |
917 | if (ret) | ||
918 | goto err_init_instance; | ||
919 | } | ||
920 | 1081 | ||
921 | /* take shadow to the vm for general usage */ | 1082 | err = gk20a_cde_allocate_contexts(g); |
922 | cde_app->vm = cde_app->cde_ctx->vm; | 1083 | if (!err) |
1084 | cde_app->initialised = true; | ||
923 | 1085 | ||
924 | cde_app->initialised = true; | ||
925 | mutex_unlock(&cde_app->mutex); | 1086 | mutex_unlock(&cde_app->mutex); |
926 | 1087 | gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err); | |
927 | return 0; | 1088 | return err; |
928 | |||
929 | err_init_instance: | ||
930 | |||
931 | /* deinitialise initialised channels */ | ||
932 | while (i--) { | ||
933 | gk20a_cde_remove(cde_ctx); | ||
934 | cde_ctx--; | ||
935 | } | ||
936 | return ret; | ||
937 | } | 1089 | } |
938 | 1090 | ||
939 | enum cde_launch_patch_offset { | 1091 | enum cde_launch_patch_offset { |
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h index e4d4659d..4120dc94 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |||
@@ -242,19 +242,26 @@ struct gk20a_cde_ctx { | |||
242 | struct kobj_attribute attr; | 242 | struct kobj_attribute attr; |
243 | 243 | ||
244 | bool init_cmd_executed; | 244 | bool init_cmd_executed; |
245 | |||
246 | struct list_head list; | ||
247 | bool is_temporary; | ||
248 | bool in_use; | ||
249 | struct delayed_work ctx_deleter_work; | ||
245 | }; | 250 | }; |
246 | 251 | ||
247 | struct gk20a_cde_app { | 252 | struct gk20a_cde_app { |
248 | bool initialised; | 253 | bool initialised; |
249 | struct mutex mutex; | 254 | struct mutex mutex; |
250 | struct vm_gk20a *vm; | ||
251 | 255 | ||
252 | struct gk20a_cde_ctx cde_ctx[NUM_CDE_CONTEXTS]; | 256 | struct list_head cde_ctx_lru; |
257 | int lru_len; | ||
258 | int lru_max_len; | ||
259 | int lru_used; | ||
253 | 260 | ||
254 | u32 shader_parameter; | 261 | u32 shader_parameter; |
255 | }; | 262 | }; |
256 | 263 | ||
257 | int gk20a_cde_destroy(struct gk20a *g); | 264 | void gk20a_cde_destroy(struct gk20a *g); |
258 | int gk20a_init_cde_support(struct gk20a *g); | 265 | int gk20a_init_cde_support(struct gk20a *g); |
259 | int gk20a_cde_reload(struct gk20a *g); | 266 | int gk20a_cde_reload(struct gk20a *g); |
260 | int gk20a_cde_convert(struct gk20a *g, struct dma_buf *dst, | 267 | int gk20a_cde_convert(struct gk20a *g, struct dma_buf *dst, |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 309a1b08..825cb886 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -535,6 +535,7 @@ enum gk20a_dbg_categories { | |||
535 | gpu_dbg_map = BIT(8), /* mem mappings */ | 535 | gpu_dbg_map = BIT(8), /* mem mappings */ |
536 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ | 536 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ |
537 | gpu_dbg_cde = BIT(10), /* cde info messages */ | 537 | gpu_dbg_cde = BIT(10), /* cde info messages */ |
538 | gpu_dbg_cde_ctx = BIT(11), /* cde context usage messages */ | ||
538 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | 539 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ |
539 | }; | 540 | }; |
540 | 541 | ||