diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ce2.c | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c new file mode 100644 index 00000000..f172cede --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/ce2.c | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <uapi/linux/nvgpu.h> | ||
18 | |||
19 | #include <nvgpu/types.h> | ||
20 | |||
21 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
22 | |||
23 | #include "gk20a/ce2_gk20a.h" | ||
24 | #include "gk20a/gk20a.h" | ||
25 | #include "channel.h" | ||
26 | |||
27 | static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) | ||
28 | { | ||
29 | /* there is no local memory available, | ||
30 | don't allow local memory related CE flags */ | ||
31 | if (!g->mm.vidmem.size) { | ||
32 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | ||
33 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | ||
34 | } | ||
35 | return launch_flags; | ||
36 | } | ||
37 | |||
38 | int gk20a_ce_execute_ops(struct gk20a *g, | ||
39 | u32 ce_ctx_id, | ||
40 | u64 src_buf, | ||
41 | u64 dst_buf, | ||
42 | u64 size, | ||
43 | unsigned int payload, | ||
44 | int launch_flags, | ||
45 | int request_operation, | ||
46 | struct gk20a_fence *gk20a_fence_in, | ||
47 | u32 submit_flags, | ||
48 | struct gk20a_fence **gk20a_fence_out) | ||
49 | { | ||
50 | int ret = -EPERM; | ||
51 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
52 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
53 | bool found = false; | ||
54 | u32 *cmd_buf_cpu_va; | ||
55 | u64 cmd_buf_gpu_va = 0; | ||
56 | u32 methodSize; | ||
57 | u32 cmd_buf_read_offset; | ||
58 | u32 fence_index; | ||
59 | u32 dma_copy_class; | ||
60 | struct nvgpu_gpfifo gpfifo; | ||
61 | struct nvgpu_fence fence = {0,0}; | ||
62 | struct gk20a_fence *ce_cmd_buf_fence_out = NULL; | ||
63 | |||
64 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
65 | goto end; | ||
66 | |||
67 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
68 | |||
69 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
70 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
71 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
72 | found = true; | ||
73 | break; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
78 | |||
79 | if (!found) { | ||
80 | ret = -EINVAL; | ||
81 | goto end; | ||
82 | } | ||
83 | |||
84 | if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { | ||
85 | ret = -ENODEV; | ||
86 | goto end; | ||
87 | } | ||
88 | |||
89 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | ||
90 | |||
91 | ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; | ||
92 | |||
93 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | ||
94 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
95 | |||
96 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
97 | fence_index = (cmd_buf_read_offset + | ||
98 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
99 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
100 | |||
101 | if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { | ||
102 | ret = -ENOMEM; | ||
103 | goto noop; | ||
104 | } | ||
105 | |||
106 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
107 | |||
108 | /* 0 is treated as invalid pre-sync */ | ||
109 | if (cmd_buf_cpu_va[fence_index]) { | ||
110 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
111 | |||
112 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
113 | (void *)(cmd_buf_cpu_va + fence_index), | ||
114 | sizeof(struct gk20a_fence *)); | ||
115 | ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in, | ||
116 | gk20a_get_gr_idle_timeout(g)); | ||
117 | |||
118 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
119 | /* Reset the stored last pre-sync */ | ||
120 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
121 | 0, | ||
122 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
123 | if (ret) | ||
124 | goto noop; | ||
125 | } | ||
126 | |||
127 | cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); | ||
128 | |||
129 | dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); | ||
130 | methodSize = gk20a_ce_prepare_submit(src_buf, | ||
131 | dst_buf, | ||
132 | size, | ||
133 | &cmd_buf_cpu_va[cmd_buf_read_offset], | ||
134 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, | ||
135 | payload, | ||
136 | gk20a_get_valid_launch_flags(g, launch_flags), | ||
137 | request_operation, | ||
138 | dma_copy_class, | ||
139 | gk20a_fence_in); | ||
140 | |||
141 | if (methodSize) { | ||
142 | /* TODO: Remove CPU pre-fence wait */ | ||
143 | if (gk20a_fence_in) { | ||
144 | ret = gk20a_fence_wait(g, gk20a_fence_in, | ||
145 | gk20a_get_gr_idle_timeout(g)); | ||
146 | gk20a_fence_put(gk20a_fence_in); | ||
147 | if (ret) | ||
148 | goto noop; | ||
149 | } | ||
150 | |||
151 | /* store the element into gpfifo */ | ||
152 | gpfifo.entry0 = | ||
153 | u64_lo32(cmd_buf_gpu_va); | ||
154 | gpfifo.entry1 = | ||
155 | (u64_hi32(cmd_buf_gpu_va) | | ||
156 | pbdma_gp_entry1_length_f(methodSize)); | ||
157 | |||
158 | /* take always the postfence as it is needed for protecting the ce context */ | ||
159 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
160 | |||
161 | nvgpu_smp_wmb(); | ||
162 | |||
163 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | ||
164 | 1, submit_flags, &fence, | ||
165 | &ce_cmd_buf_fence_out, false, NULL); | ||
166 | |||
167 | if (!ret) { | ||
168 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | ||
169 | (void *)&ce_cmd_buf_fence_out, | ||
170 | sizeof(struct gk20a_fence *)); | ||
171 | |||
172 | if (gk20a_fence_out) { | ||
173 | gk20a_fence_get(ce_cmd_buf_fence_out); | ||
174 | *gk20a_fence_out = ce_cmd_buf_fence_out; | ||
175 | } | ||
176 | |||
177 | /* Next available command buffer queue Index */ | ||
178 | ++ce_ctx->cmd_buf_read_queue_offset; | ||
179 | } | ||
180 | } else { | ||
181 | ret = -ENOMEM; | ||
182 | } | ||
183 | noop: | ||
184 | nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); | ||
185 | end: | ||
186 | return ret; | ||
187 | } | ||