summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/ce2.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/ce2.c187
1 files changed, 187 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
new file mode 100644
index 00000000..f172cede
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/ce2.c
@@ -0,0 +1,187 @@
1/*
2 * Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <uapi/linux/nvgpu.h>
18
19#include <nvgpu/types.h>
20
21#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
22
23#include "gk20a/ce2_gk20a.h"
24#include "gk20a/gk20a.h"
25#include "channel.h"
26
27static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
28{
29 /* there is no local memory available,
30 don't allow local memory related CE flags */
31 if (!g->mm.vidmem.size) {
32 launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
33 NVGPU_CE_DST_LOCATION_LOCAL_FB);
34 }
35 return launch_flags;
36}
37
38int gk20a_ce_execute_ops(struct gk20a *g,
39 u32 ce_ctx_id,
40 u64 src_buf,
41 u64 dst_buf,
42 u64 size,
43 unsigned int payload,
44 int launch_flags,
45 int request_operation,
46 struct gk20a_fence *gk20a_fence_in,
47 u32 submit_flags,
48 struct gk20a_fence **gk20a_fence_out)
49{
50 int ret = -EPERM;
51 struct gk20a_ce_app *ce_app = &g->ce_app;
52 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
53 bool found = false;
54 u32 *cmd_buf_cpu_va;
55 u64 cmd_buf_gpu_va = 0;
56 u32 methodSize;
57 u32 cmd_buf_read_offset;
58 u32 fence_index;
59 u32 dma_copy_class;
60 struct nvgpu_gpfifo gpfifo;
61 struct nvgpu_fence fence = {0,0};
62 struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
63
64 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
65 goto end;
66
67 nvgpu_mutex_acquire(&ce_app->app_mutex);
68
69 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
70 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
71 if (ce_ctx->ctx_id == ce_ctx_id) {
72 found = true;
73 break;
74 }
75 }
76
77 nvgpu_mutex_release(&ce_app->app_mutex);
78
79 if (!found) {
80 ret = -EINVAL;
81 goto end;
82 }
83
84 if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
85 ret = -ENODEV;
86 goto end;
87 }
88
89 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
90
91 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
92
93 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
94 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
95
96 /* at end of command buffer has gk20a_fence for command buffer sync */
97 fence_index = (cmd_buf_read_offset +
98 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
99 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
100
101 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
102 ret = -ENOMEM;
103 goto noop;
104 }
105
106 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
107
108 /* 0 is treated as invalid pre-sync */
109 if (cmd_buf_cpu_va[fence_index]) {
110 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
111
112 memcpy((void *)&ce_cmd_buf_fence_in,
113 (void *)(cmd_buf_cpu_va + fence_index),
114 sizeof(struct gk20a_fence *));
115 ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
116 gk20a_get_gr_idle_timeout(g));
117
118 gk20a_fence_put(ce_cmd_buf_fence_in);
119 /* Reset the stored last pre-sync */
120 memset((void *)(cmd_buf_cpu_va + fence_index),
121 0,
122 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
123 if (ret)
124 goto noop;
125 }
126
127 cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
128
129 dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
130 methodSize = gk20a_ce_prepare_submit(src_buf,
131 dst_buf,
132 size,
133 &cmd_buf_cpu_va[cmd_buf_read_offset],
134 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
135 payload,
136 gk20a_get_valid_launch_flags(g, launch_flags),
137 request_operation,
138 dma_copy_class,
139 gk20a_fence_in);
140
141 if (methodSize) {
142 /* TODO: Remove CPU pre-fence wait */
143 if (gk20a_fence_in) {
144 ret = gk20a_fence_wait(g, gk20a_fence_in,
145 gk20a_get_gr_idle_timeout(g));
146 gk20a_fence_put(gk20a_fence_in);
147 if (ret)
148 goto noop;
149 }
150
151 /* store the element into gpfifo */
152 gpfifo.entry0 =
153 u64_lo32(cmd_buf_gpu_va);
154 gpfifo.entry1 =
155 (u64_hi32(cmd_buf_gpu_va) |
156 pbdma_gp_entry1_length_f(methodSize));
157
158 /* take always the postfence as it is needed for protecting the ce context */
159 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
160
161 nvgpu_smp_wmb();
162
163 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
164 1, submit_flags, &fence,
165 &ce_cmd_buf_fence_out, false, NULL);
166
167 if (!ret) {
168 memcpy((void *)(cmd_buf_cpu_va + fence_index),
169 (void *)&ce_cmd_buf_fence_out,
170 sizeof(struct gk20a_fence *));
171
172 if (gk20a_fence_out) {
173 gk20a_fence_get(ce_cmd_buf_fence_out);
174 *gk20a_fence_out = ce_cmd_buf_fence_out;
175 }
176
177 /* Next available command buffer queue Index */
178 ++ce_ctx->cmd_buf_read_queue_offset;
179 }
180 } else {
181 ret = -ENOMEM;
182 }
183noop:
184 nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
185end:
186 return ret;
187}