summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/ce2.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/ce2.c185
1 files changed, 185 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
new file mode 100644
index 00000000..3fee23e5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/ce2.c
@@ -0,0 +1,185 @@
1/*
2 * Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/types.h>
18
19#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
20
21#include "gk20a/ce2_gk20a.h"
22#include "gk20a/gk20a.h"
23#include "channel.h"
24
25static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
26{
27 /* there is no local memory available,
28 don't allow local memory related CE flags */
29 if (!g->mm.vidmem.size) {
30 launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
31 NVGPU_CE_DST_LOCATION_LOCAL_FB);
32 }
33 return launch_flags;
34}
35
36int gk20a_ce_execute_ops(struct gk20a *g,
37 u32 ce_ctx_id,
38 u64 src_buf,
39 u64 dst_buf,
40 u64 size,
41 unsigned int payload,
42 int launch_flags,
43 int request_operation,
44 struct gk20a_fence *gk20a_fence_in,
45 u32 submit_flags,
46 struct gk20a_fence **gk20a_fence_out)
47{
48 int ret = -EPERM;
49 struct gk20a_ce_app *ce_app = &g->ce_app;
50 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
51 bool found = false;
52 u32 *cmd_buf_cpu_va;
53 u64 cmd_buf_gpu_va = 0;
54 u32 methodSize;
55 u32 cmd_buf_read_offset;
56 u32 fence_index;
57 struct nvgpu_gpfifo gpfifo;
58 struct nvgpu_fence fence = {0,0};
59 struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
60 struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics;
61
62 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
63 goto end;
64
65 nvgpu_mutex_acquire(&ce_app->app_mutex);
66
67 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
68 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
69 if (ce_ctx->ctx_id == ce_ctx_id) {
70 found = true;
71 break;
72 }
73 }
74
75 nvgpu_mutex_release(&ce_app->app_mutex);
76
77 if (!found) {
78 ret = -EINVAL;
79 goto end;
80 }
81
82 if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
83 ret = -ENODEV;
84 goto end;
85 }
86
87 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
88
89 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
90
91 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
92 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
93
94 /* at end of command buffer has gk20a_fence for command buffer sync */
95 fence_index = (cmd_buf_read_offset +
96 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
97 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
98
99 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
100 ret = -ENOMEM;
101 goto noop;
102 }
103
104 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
105
106 /* 0 is treated as invalid pre-sync */
107 if (cmd_buf_cpu_va[fence_index]) {
108 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
109
110 memcpy((void *)&ce_cmd_buf_fence_in,
111 (void *)(cmd_buf_cpu_va + fence_index),
112 sizeof(struct gk20a_fence *));
113 ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
114 gk20a_get_gr_idle_timeout(g));
115
116 gk20a_fence_put(ce_cmd_buf_fence_in);
117 /* Reset the stored last pre-sync */
118 memset((void *)(cmd_buf_cpu_va + fence_index),
119 0,
120 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
121 if (ret)
122 goto noop;
123 }
124
125 cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
126
127 methodSize = gk20a_ce_prepare_submit(src_buf,
128 dst_buf,
129 size,
130 &cmd_buf_cpu_va[cmd_buf_read_offset],
131 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
132 payload,
133 gk20a_get_valid_launch_flags(g, launch_flags),
134 request_operation,
135 gpu_capability->dma_copy_class,
136 gk20a_fence_in);
137
138 if (methodSize) {
139 /* TODO: Remove CPU pre-fence wait */
140 if (gk20a_fence_in) {
141 ret = gk20a_fence_wait(g, gk20a_fence_in,
142 gk20a_get_gr_idle_timeout(g));
143 gk20a_fence_put(gk20a_fence_in);
144 if (ret)
145 goto noop;
146 }
147
148 /* store the element into gpfifo */
149 gpfifo.entry0 =
150 u64_lo32(cmd_buf_gpu_va);
151 gpfifo.entry1 =
152 (u64_hi32(cmd_buf_gpu_va) |
153 pbdma_gp_entry1_length_f(methodSize));
154
155 /* take always the postfence as it is needed for protecting the ce context */
156 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
157
158 nvgpu_smp_wmb();
159
160 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
161 1, submit_flags, &fence,
162 &ce_cmd_buf_fence_out, false, NULL);
163
164 if (!ret) {
165 memcpy((void *)(cmd_buf_cpu_va + fence_index),
166 (void *)&ce_cmd_buf_fence_out,
167 sizeof(struct gk20a_fence *));
168
169 if (gk20a_fence_out) {
170 gk20a_fence_get(ce_cmd_buf_fence_out);
171 *gk20a_fence_out = ce_cmd_buf_fence_out;
172 }
173
174 /* Next available command buffer queue Index */
175 ++ce_ctx->cmd_buf_read_queue_offset;
176 ++ce_ctx->submitted_seq_number;
177 }
178 } else {
179 ret = -ENOMEM;
180 }
181noop:
182 nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
183end:
184 return ret;
185}