diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 617 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.h | 124 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 27 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 86 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/pci.c | 2 |
14 files changed, 896 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 4b84dc69..f5b68e72 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1186,7 +1186,9 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | |||
1186 | } | 1186 | } |
1187 | 1187 | ||
1188 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | 1188 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, |
1189 | cde_ctx); | 1189 | cde_ctx, |
1190 | -1, | ||
1191 | false); | ||
1190 | if (!ch) { | 1192 | if (!ch) { |
1191 | gk20a_warn(cde_ctx->dev, "cde: gk20a channel not available"); | 1193 | gk20a_warn(cde_ctx->dev, "cde: gk20a channel not available"); |
1192 | err = -ENOMEM; | 1194 | err = -ENOMEM; |
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 96d38b11..e2f2d9e9 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <trace/events/gk20a.h> | 24 | #include <trace/events/gk20a.h> |
25 | #include <linux/dma-mapping.h> | 25 | #include <linux/dma-mapping.h> |
26 | #include <linux/nvhost.h> | 26 | #include <linux/nvhost.h> |
27 | #include <linux/debugfs.h> | ||
27 | 28 | ||
28 | #include "gk20a.h" | 29 | #include "gk20a.h" |
29 | #include "debug_gk20a.h" | 30 | #include "debug_gk20a.h" |
@@ -96,3 +97,619 @@ void gk20a_init_ce2(struct gpu_ops *gops) | |||
96 | gops->ce2.isr_stall = gk20a_ce2_isr; | 97 | gops->ce2.isr_stall = gk20a_ce2_isr; |
97 | gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr; | 98 | gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr; |
98 | } | 99 | } |
100 | |||
101 | /* static CE app api */ | ||
102 | static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event) | ||
103 | { | ||
104 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
105 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
106 | |||
107 | if (!ce_app->initialised) | ||
108 | return; | ||
109 | |||
110 | mutex_lock(&ce_app->app_mutex); | ||
111 | |||
112 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
113 | &ce_app->allocated_contexts, list) { | ||
114 | if (ce_ctx->user_event_callback) { | ||
115 | ce_ctx->user_event_callback(ce_ctx->ctx_id, | ||
116 | event); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | mutex_unlock(&ce_app->app_mutex); | ||
121 | } | ||
122 | |||
123 | static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
124 | { | ||
125 | struct gk20a_gpu_ctx *ce_ctx = data; | ||
126 | bool channel_idle; | ||
127 | u32 event; | ||
128 | |||
129 | mutex_lock(&ch->jobs_lock); | ||
130 | channel_idle = list_empty(&ch->jobs); | ||
131 | mutex_unlock(&ch->jobs_lock); | ||
132 | |||
133 | if (!channel_idle) | ||
134 | return; | ||
135 | |||
136 | gk20a_dbg(gpu_dbg_fn, "ce: finished %p", ce_ctx); | ||
137 | |||
138 | if (ch->has_timedout) | ||
139 | event = NVGPU_CE_CONTEXT_JOB_TIMEDOUT; | ||
140 | else | ||
141 | event = NVGPU_CE_CONTEXT_JOB_COMPLETED; | ||
142 | |||
143 | if (ce_ctx->user_event_callback) | ||
144 | ce_ctx->user_event_callback(ce_ctx->ctx_id, | ||
145 | event); | ||
146 | |||
147 | ++ce_ctx->completed_seq_number; | ||
148 | } | ||
149 | |||
150 | static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) | ||
151 | { | ||
152 | u32 cmd_buf_index; | ||
153 | u32 cmd_buf_read_offset; | ||
154 | u32 fence_index; | ||
155 | u32 *cmd_buf_cpu_va; | ||
156 | |||
157 | for (cmd_buf_index = 0; | ||
158 | cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; | ||
159 | cmd_buf_index++) { | ||
160 | cmd_buf_read_offset = (cmd_buf_index * | ||
161 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
162 | |||
163 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
164 | fence_index = (cmd_buf_read_offset + | ||
165 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
166 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
167 | |||
168 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
169 | |||
170 | /* 0 is treated as invalid pre-sync */ | ||
171 | if (cmd_buf_cpu_va[fence_index]) { | ||
172 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
173 | |||
174 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
175 | (void *)(cmd_buf_cpu_va + fence_index), | ||
176 | sizeof(struct gk20a_fence *)); | ||
177 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
178 | /* Reset the stored last pre-sync */ | ||
179 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
180 | 0, | ||
181 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
182 | } | ||
183 | } | ||
184 | } | ||
185 | |||
186 | /* assume this api should need to call under mutex_lock(&ce_app->app_mutex) */ | ||
187 | static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) | ||
188 | { | ||
189 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED; | ||
190 | |||
191 | mutex_lock(&ce_ctx->gpu_ctx_mutex); | ||
192 | |||
193 | gk20a_ce_free_command_buffer_stored_fence(ce_ctx); | ||
194 | |||
195 | gk20a_gmmu_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); | ||
196 | |||
197 | /* free the channel */ | ||
198 | if (ce_ctx->ch) | ||
199 | gk20a_channel_close(ce_ctx->ch); | ||
200 | |||
201 | /* housekeeping on app */ | ||
202 | list_del(&ce_ctx->list); | ||
203 | |||
204 | mutex_unlock(&ce_ctx->gpu_ctx_mutex); | ||
205 | mutex_destroy(&ce_ctx->gpu_ctx_mutex); | ||
206 | |||
207 | kfree(ce_ctx); | ||
208 | } | ||
209 | |||
210 | static inline int gk20a_ce_get_method_size(int request_operation) | ||
211 | { | ||
212 | /* failure size */ | ||
213 | int methodsize = ~0; | ||
214 | |||
215 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) | ||
216 | methodsize = 10 * 2 * sizeof(u32); | ||
217 | else if (request_operation & NVGPU_CE_MEMSET) | ||
218 | methodsize = 9 * 2 * sizeof(u32); | ||
219 | |||
220 | return methodsize; | ||
221 | } | ||
222 | |||
223 | static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) | ||
224 | { | ||
225 | /* there is no local memory available, | ||
226 | don't allow local memory related CE flags */ | ||
227 | if (!g->mm.vidmem_size) { | ||
228 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | ||
229 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | ||
230 | } | ||
231 | return launch_flags; | ||
232 | } | ||
233 | |||
234 | static int gk20a_ce_prepare_submit(u64 src_buf, | ||
235 | u64 dst_buf, | ||
236 | u64 size, | ||
237 | u32 *cmd_buf_cpu_va, | ||
238 | u32 max_cmd_buf_size, | ||
239 | unsigned int payload, | ||
240 | int launch_flags, | ||
241 | int request_operation, | ||
242 | u32 dma_copy_class, | ||
243 | struct gk20a_fence *gk20a_fence_in) | ||
244 | { | ||
245 | u32 launch = 0; | ||
246 | u32 methodSize = 0; | ||
247 | |||
248 | /* failure case handling */ | ||
249 | if ((gk20a_ce_get_method_size(request_operation) > max_cmd_buf_size) || | ||
250 | (!size) || | ||
251 | (request_operation > NVGPU_CE_MEMSET)) | ||
252 | return 0; | ||
253 | |||
254 | /* set the channel object */ | ||
255 | cmd_buf_cpu_va[methodSize++] = 0x20018000; | ||
256 | cmd_buf_cpu_va[methodSize++] = dma_copy_class; | ||
257 | |||
258 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) { | ||
259 | /* setup the source */ | ||
260 | cmd_buf_cpu_va[methodSize++] = 0x20018101; | ||
261 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf) & | ||
262 | NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
263 | |||
264 | cmd_buf_cpu_va[methodSize++] = 0x20018100; | ||
265 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf) & | ||
266 | NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
267 | |||
268 | cmd_buf_cpu_va[methodSize++] = 0x20018098; | ||
269 | if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) { | ||
270 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
271 | } else if (launch_flags & NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) { | ||
272 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
273 | } else { | ||
274 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
275 | } | ||
276 | |||
277 | launch |= 0x00001000; | ||
278 | } else if (request_operation & NVGPU_CE_MEMSET) { | ||
279 | cmd_buf_cpu_va[methodSize++] = 0x200181c2; | ||
280 | cmd_buf_cpu_va[methodSize++] = 0x00030004; | ||
281 | |||
282 | cmd_buf_cpu_va[methodSize++] = 0x200181c0; | ||
283 | cmd_buf_cpu_va[methodSize++] = payload; | ||
284 | |||
285 | launch |= 0x00000400; | ||
286 | |||
287 | /* converted into number of words */ | ||
288 | size /= sizeof(u32); | ||
289 | } | ||
290 | |||
291 | /* setup the destination/output */ | ||
292 | cmd_buf_cpu_va[methodSize++] = 0x20018103; | ||
293 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
294 | |||
295 | cmd_buf_cpu_va[methodSize++] = 0x20018102; | ||
296 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
297 | |||
298 | cmd_buf_cpu_va[methodSize++] = 0x20018099; | ||
299 | if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) { | ||
300 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
301 | } else if (launch_flags & NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) { | ||
302 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
303 | } else { | ||
304 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
305 | } | ||
306 | |||
307 | launch |= 0x00002000; | ||
308 | |||
309 | /* setup the format */ | ||
310 | cmd_buf_cpu_va[methodSize++] = 0x20018107; | ||
311 | cmd_buf_cpu_va[methodSize++] = 1; | ||
312 | cmd_buf_cpu_va[methodSize++] = 0x20018106; | ||
313 | cmd_buf_cpu_va[methodSize++] = u64_lo32(size); | ||
314 | |||
315 | launch |= 0x00000004; | ||
316 | |||
317 | if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) | ||
318 | launch |= 0x00000000; | ||
319 | else | ||
320 | launch |= 0x00000080; | ||
321 | |||
322 | if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) | ||
323 | launch |= 0x00000000; | ||
324 | else | ||
325 | launch |= 0x00000100; | ||
326 | |||
327 | if (launch_flags & NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED) | ||
328 | launch |= 0x00000002; | ||
329 | else | ||
330 | launch |= 0x00000001; | ||
331 | |||
332 | cmd_buf_cpu_va[methodSize++] = 0x200180c0; | ||
333 | cmd_buf_cpu_va[methodSize++] = launch; | ||
334 | |||
335 | return methodSize; | ||
336 | } | ||
337 | |||
338 | /* global CE app related apis */ | ||
339 | int gk20a_init_ce_support(struct gk20a *g) | ||
340 | { | ||
341 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
342 | |||
343 | if (ce_app->initialised) { | ||
344 | /* assume this happen during poweron/poweroff GPU sequence */ | ||
345 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
346 | gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_RESUME); | ||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | gk20a_dbg(gpu_dbg_fn, "ce: init"); | ||
351 | |||
352 | mutex_init(&ce_app->app_mutex); | ||
353 | mutex_lock(&ce_app->app_mutex); | ||
354 | |||
355 | INIT_LIST_HEAD(&ce_app->allocated_contexts); | ||
356 | ce_app->ctx_count = 0; | ||
357 | ce_app->next_ctx_id = 0; | ||
358 | ce_app->initialised = true; | ||
359 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
360 | |||
361 | mutex_unlock(&ce_app->app_mutex); | ||
362 | gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished"); | ||
363 | |||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | void gk20a_ce_destroy(struct gk20a *g) | ||
368 | { | ||
369 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
370 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
371 | |||
372 | if (!ce_app->initialised) | ||
373 | return; | ||
374 | |||
375 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
376 | ce_app->initialised = false; | ||
377 | |||
378 | mutex_lock(&ce_app->app_mutex); | ||
379 | |||
380 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
381 | &ce_app->allocated_contexts, list) { | ||
382 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
383 | } | ||
384 | |||
385 | INIT_LIST_HEAD(&ce_app->allocated_contexts); | ||
386 | ce_app->ctx_count = 0; | ||
387 | ce_app->next_ctx_id = 0; | ||
388 | |||
389 | mutex_unlock(&ce_app->app_mutex); | ||
390 | mutex_destroy(&ce_app->app_mutex); | ||
391 | } | ||
392 | |||
393 | void gk20a_ce_suspend(struct gk20a *g) | ||
394 | { | ||
395 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
396 | |||
397 | if (!ce_app->initialised) | ||
398 | return; | ||
399 | |||
400 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
401 | gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_SUSPEND); | ||
402 | |||
403 | return; | ||
404 | } | ||
405 | |||
406 | /* CE app utility functions */ | ||
407 | u32 gk20a_ce_create_context_with_cb(struct device *dev, | ||
408 | int runlist_id, | ||
409 | int priority, | ||
410 | int timeslice, | ||
411 | int runlist_level, | ||
412 | ce_event_callback user_event_callback) | ||
413 | { | ||
414 | struct gk20a_gpu_ctx *ce_ctx; | ||
415 | struct gk20a *g = gk20a_from_dev(dev); | ||
416 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
417 | u32 ctx_id = ~0; | ||
418 | int err = 0; | ||
419 | |||
420 | if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) | ||
421 | return ctx_id; | ||
422 | |||
423 | ce_ctx = kzalloc(sizeof(*ce_ctx), GFP_KERNEL); | ||
424 | if (!ce_ctx) | ||
425 | return ctx_id; | ||
426 | |||
427 | mutex_init(&ce_ctx->gpu_ctx_mutex); | ||
428 | |||
429 | ce_ctx->g = g; | ||
430 | ce_ctx->dev = g->dev; | ||
431 | ce_ctx->user_event_callback = user_event_callback; | ||
432 | |||
433 | ce_ctx->cmd_buf_read_queue_offset = 0; | ||
434 | ce_ctx->cmd_buf_end_queue_offset = | ||
435 | (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF); | ||
436 | |||
437 | ce_ctx->submitted_seq_number = 0; | ||
438 | ce_ctx->completed_seq_number = 0; | ||
439 | |||
440 | /* always kernel client needs privileged channel */ | ||
441 | ce_ctx->ch = gk20a_open_new_channel_with_cb(g, gk20a_ce_finished_ctx_cb, | ||
442 | ce_ctx, | ||
443 | runlist_id, | ||
444 | true); | ||
445 | if (!ce_ctx->ch) { | ||
446 | gk20a_err(ce_ctx->dev, "ce: gk20a channel not available"); | ||
447 | goto end; | ||
448 | } | ||
449 | |||
450 | /* bind the channel to the vm */ | ||
451 | gk20a_vm_get(&g->mm.ce.vm); | ||
452 | ce_ctx->vm = ce_ctx->ch->vm = &g->mm.ce.vm; | ||
453 | err = channel_gk20a_commit_va(ce_ctx->ch); | ||
454 | if (err) { | ||
455 | gk20a_err(ce_ctx->dev, "ce: could not bind vm"); | ||
456 | goto end; | ||
457 | } | ||
458 | |||
459 | /* allocate gpfifo (1024 should be more than enough) */ | ||
460 | err = gk20a_alloc_channel_gpfifo(ce_ctx->ch, | ||
461 | &(struct nvgpu_alloc_gpfifo_args){1024, 0}); | ||
462 | if (err) { | ||
463 | gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo"); | ||
464 | goto end; | ||
465 | } | ||
466 | |||
467 | /* allocate command buffer (4096 should be more than enough) from sysmem*/ | ||
468 | err = gk20a_gmmu_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); | ||
469 | if (err) { | ||
470 | gk20a_err(ce_ctx->dev, | ||
471 | "ce: could not allocate command buffer for CE context"); | ||
472 | goto end; | ||
473 | } | ||
474 | |||
475 | memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size); | ||
476 | |||
477 | /* -1 means default channel priority */ | ||
478 | if (priority != -1) { | ||
479 | err = gk20a_channel_set_priority(ce_ctx->ch, priority); | ||
480 | if (err) { | ||
481 | gk20a_err(ce_ctx->dev, | ||
482 | "ce: could not set the channel priority for CE context"); | ||
483 | goto end; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* -1 means default channel timeslice value */ | ||
488 | if (timeslice != -1) { | ||
489 | err = gk20a_channel_set_timeslice(ce_ctx->ch, timeslice); | ||
490 | if (err) { | ||
491 | gk20a_err(ce_ctx->dev, | ||
492 | "ce: could not set the channel timeslice value for CE context"); | ||
493 | goto end; | ||
494 | } | ||
495 | } | ||
496 | |||
497 | /* -1 means default channel runlist level */ | ||
498 | if (runlist_level != -1) { | ||
499 | err = gk20a_channel_set_runlist_interleave(ce_ctx->ch, runlist_level); | ||
500 | if (err) { | ||
501 | gk20a_err(ce_ctx->dev, | ||
502 | "ce: could not set the runlist interleave for CE context"); | ||
503 | goto end; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | mutex_lock(&ce_app->app_mutex); | ||
508 | ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id; | ||
509 | list_add(&ce_ctx->list, &ce_app->allocated_contexts); | ||
510 | ++ce_app->next_ctx_id; | ||
511 | ++ce_app->ctx_count; | ||
512 | mutex_unlock(&ce_app->app_mutex); | ||
513 | |||
514 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED; | ||
515 | |||
516 | end: | ||
517 | if (ctx_id == ~0) { | ||
518 | mutex_lock(&ce_app->app_mutex); | ||
519 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
520 | mutex_unlock(&ce_app->app_mutex); | ||
521 | } | ||
522 | return ctx_id; | ||
523 | |||
524 | } | ||
525 | EXPORT_SYMBOL(gk20a_ce_create_context_with_cb); | ||
526 | |||
527 | int gk20a_ce_execute_ops(struct device *dev, | ||
528 | u32 ce_ctx_id, | ||
529 | u64 src_buf, | ||
530 | u64 dst_buf, | ||
531 | u64 size, | ||
532 | unsigned int payload, | ||
533 | int launch_flags, | ||
534 | int request_operation, | ||
535 | struct gk20a_fence *gk20a_fence_in, | ||
536 | u32 submit_flags, | ||
537 | struct gk20a_fence **gk20a_fence_out) | ||
538 | { | ||
539 | int ret = -EPERM; | ||
540 | struct gk20a *g = gk20a_from_dev(dev); | ||
541 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
542 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
543 | bool found = false; | ||
544 | u32 *cmd_buf_cpu_va; | ||
545 | u64 cmd_buf_gpu_va = 0; | ||
546 | u32 methodSize; | ||
547 | u32 cmd_buf_read_offset; | ||
548 | u32 fence_index; | ||
549 | struct nvgpu_gpfifo gpfifo; | ||
550 | struct nvgpu_fence fence = {0,0}; | ||
551 | struct gk20a_fence *ce_cmd_buf_fence_out = NULL; | ||
552 | struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics; | ||
553 | |||
554 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
555 | goto end; | ||
556 | |||
557 | mutex_lock(&ce_app->app_mutex); | ||
558 | |||
559 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
560 | &ce_app->allocated_contexts, list) { | ||
561 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
562 | found = true; | ||
563 | break; | ||
564 | } | ||
565 | } | ||
566 | |||
567 | mutex_unlock(&ce_app->app_mutex); | ||
568 | |||
569 | if (!found) { | ||
570 | ret = -EINVAL; | ||
571 | goto end; | ||
572 | } | ||
573 | |||
574 | if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { | ||
575 | ret = -ENODEV; | ||
576 | goto end; | ||
577 | } | ||
578 | |||
579 | mutex_lock(&ce_ctx->gpu_ctx_mutex); | ||
580 | |||
581 | ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; | ||
582 | |||
583 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | ||
584 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
585 | |||
586 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
587 | fence_index = (cmd_buf_read_offset + | ||
588 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
589 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
590 | |||
591 | if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { | ||
592 | ret = -ENOMEM; | ||
593 | goto noop; | ||
594 | } | ||
595 | |||
596 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
597 | |||
598 | /* 0 is treated as invalid pre-sync */ | ||
599 | if (cmd_buf_cpu_va[fence_index]) { | ||
600 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
601 | |||
602 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
603 | (void *)(cmd_buf_cpu_va + fence_index), | ||
604 | sizeof(struct gk20a_fence *)); | ||
605 | ret = gk20a_fence_wait(ce_cmd_buf_fence_in, gk20a_get_gr_idle_timeout(g)); | ||
606 | |||
607 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
608 | /* Reset the stored last pre-sync */ | ||
609 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
610 | 0, | ||
611 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
612 | if (ret) | ||
613 | goto noop; | ||
614 | } | ||
615 | |||
616 | cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); | ||
617 | |||
618 | methodSize = gk20a_ce_prepare_submit(src_buf, | ||
619 | dst_buf, | ||
620 | size, | ||
621 | &cmd_buf_cpu_va[cmd_buf_read_offset], | ||
622 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, | ||
623 | payload, | ||
624 | gk20a_get_valid_launch_flags(g, launch_flags), | ||
625 | request_operation, | ||
626 | gpu_capability->dma_copy_class, | ||
627 | gk20a_fence_in); | ||
628 | |||
629 | if (methodSize) { | ||
630 | /* TODO: Remove CPU pre-fence wait */ | ||
631 | if (gk20a_fence_in) { | ||
632 | ret = gk20a_fence_wait(gk20a_fence_in, gk20a_get_gr_idle_timeout(g)); | ||
633 | gk20a_fence_put(gk20a_fence_in); | ||
634 | if (ret) | ||
635 | goto noop; | ||
636 | } | ||
637 | |||
638 | /* store the element into gpfifo */ | ||
639 | gpfifo.entry0 = | ||
640 | u64_lo32(cmd_buf_gpu_va); | ||
641 | gpfifo.entry1 = | ||
642 | (u64_hi32(cmd_buf_gpu_va) | | ||
643 | pbdma_gp_entry1_length_f(methodSize)); | ||
644 | |||
645 | /* take always the postfence as it is needed for protecting the ce context */ | ||
646 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
647 | |||
648 | wmb(); | ||
649 | |||
650 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | ||
651 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out, true); | ||
652 | |||
653 | if (!ret) { | ||
654 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | ||
655 | (void *)&ce_cmd_buf_fence_out, | ||
656 | sizeof(struct gk20a_fence *)); | ||
657 | |||
658 | if (gk20a_fence_out) { | ||
659 | gk20a_fence_get(ce_cmd_buf_fence_out); | ||
660 | *gk20a_fence_out = ce_cmd_buf_fence_out; | ||
661 | } | ||
662 | |||
663 | /* Next available command buffer queue Index */ | ||
664 | ++ce_ctx->cmd_buf_read_queue_offset; | ||
665 | ++ce_ctx->submitted_seq_number; | ||
666 | } | ||
667 | } else | ||
668 | ret = -ENOMEM; | ||
669 | noop: | ||
670 | mutex_unlock(&ce_ctx->gpu_ctx_mutex); | ||
671 | end: | ||
672 | return ret; | ||
673 | } | ||
674 | EXPORT_SYMBOL(gk20a_ce_execute_ops); | ||
675 | |||
676 | void gk20a_ce_delete_context(struct device *dev, | ||
677 | u32 ce_ctx_id) | ||
678 | { | ||
679 | struct gk20a *g = gk20a_from_dev(dev); | ||
680 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
681 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
682 | |||
683 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
684 | return; | ||
685 | |||
686 | mutex_lock(&ce_app->app_mutex); | ||
687 | |||
688 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
689 | &ce_app->allocated_contexts, list) { | ||
690 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
691 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
692 | --ce_app->ctx_count; | ||
693 | break; | ||
694 | } | ||
695 | } | ||
696 | |||
697 | mutex_unlock(&ce_app->app_mutex); | ||
698 | return; | ||
699 | } | ||
700 | EXPORT_SYMBOL(gk20a_ce_delete_context); | ||
701 | |||
702 | #ifdef CONFIG_DEBUG_FS | ||
703 | void gk20a_ce_debugfs_init(struct device *dev) | ||
704 | { | ||
705 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
706 | struct gk20a *g = get_gk20a(dev); | ||
707 | |||
708 | debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, | ||
709 | platform->debugfs, &g->ce_app.ctx_count); | ||
710 | debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, | ||
711 | platform->debugfs, &g->ce_app.app_state); | ||
712 | debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, | ||
713 | platform->debugfs, &g->ce_app.next_ctx_id); | ||
714 | } | ||
715 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h index 5ceb69e1..3b53834d 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h | |||
@@ -28,4 +28,128 @@ void gk20a_init_ce2(struct gpu_ops *gops); | |||
28 | void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base); | 28 | void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base); |
29 | void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); | 29 | void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); |
30 | 30 | ||
31 | /* CE command utility macros */ | ||
32 | #define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff | ||
33 | #define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff | ||
34 | |||
35 | #define NVGPU_CE_COMMAND_BUF_SIZE 4096 | ||
36 | #define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF 128 | ||
37 | #define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING 8 | ||
38 | |||
39 | typedef void (*ce_event_callback)(u32 ce_ctx_id, u32 ce_event_flag); | ||
40 | |||
41 | /* dma launch_flags */ | ||
42 | enum { | ||
43 | /* location */ | ||
44 | NVGPU_CE_SRC_LOCATION_COHERENT_SYSMEM = (1 << 0), | ||
45 | NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM = (1 << 1), | ||
46 | NVGPU_CE_SRC_LOCATION_LOCAL_FB = (1 << 2), | ||
47 | NVGPU_CE_DST_LOCATION_COHERENT_SYSMEM = (1 << 3), | ||
48 | NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM = (1 << 4), | ||
49 | NVGPU_CE_DST_LOCATION_LOCAL_FB = (1 << 5), | ||
50 | |||
51 | /* memory layout */ | ||
52 | NVGPU_CE_SRC_MEMORY_LAYOUT_PITCH = (1 << 6), | ||
53 | NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 7), | ||
54 | NVGPU_CE_DST_MEMORY_LAYOUT_PITCH = (1 << 8), | ||
55 | NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 9), | ||
56 | |||
57 | /* transfer type */ | ||
58 | NVGPU_CE_DATA_TRANSFER_TYPE_PIPELINED = (1 << 10), | ||
59 | NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED = (1 << 11), | ||
60 | }; | ||
61 | |||
62 | /* CE operation mode */ | ||
63 | enum { | ||
64 | NVGPU_CE_PHYS_MODE_TRANSFER = (1 << 0), | ||
65 | NVGPU_CE_MEMSET = (1 << 1), | ||
66 | }; | ||
67 | |||
68 | /* CE event flags */ | ||
69 | enum { | ||
70 | NVGPU_CE_CONTEXT_JOB_COMPLETED = (1 << 0), | ||
71 | NVGPU_CE_CONTEXT_JOB_TIMEDOUT = (1 << 1), | ||
72 | NVGPU_CE_CONTEXT_SUSPEND = (1 << 2), | ||
73 | NVGPU_CE_CONTEXT_RESUME = (1 << 3), | ||
74 | }; | ||
75 | |||
76 | /* CE app state machine flags */ | ||
77 | enum { | ||
78 | NVGPU_CE_ACTIVE = (1 << 0), | ||
79 | NVGPU_CE_SUSPEND = (1 << 1), | ||
80 | }; | ||
81 | |||
82 | /* gpu context state machine flags */ | ||
83 | enum { | ||
84 | NVGPU_CE_GPU_CTX_ALLOCATED = (1 << 0), | ||
85 | NVGPU_CE_GPU_CTX_DELETED = (1 << 1), | ||
86 | }; | ||
87 | |||
88 | /* global ce app db */ | ||
89 | struct gk20a_ce_app { | ||
90 | bool initialised; | ||
91 | struct mutex app_mutex; | ||
92 | int app_state; | ||
93 | |||
94 | struct list_head allocated_contexts; | ||
95 | u32 ctx_count; | ||
96 | u32 next_ctx_id; | ||
97 | }; | ||
98 | |||
99 | /* ce context db */ | ||
100 | struct gk20a_gpu_ctx { | ||
101 | struct gk20a *g; | ||
102 | struct device *dev; | ||
103 | u32 ctx_id; | ||
104 | struct mutex gpu_ctx_mutex; | ||
105 | int gpu_ctx_state; | ||
106 | ce_event_callback user_event_callback; | ||
107 | |||
108 | /* channel related data */ | ||
109 | struct channel_gk20a *ch; | ||
110 | struct vm_gk20a *vm; | ||
111 | |||
112 | /* cmd buf mem_desc */ | ||
113 | struct mem_desc cmd_buf_mem; | ||
114 | |||
115 | struct list_head list; | ||
116 | |||
117 | u64 submitted_seq_number; | ||
118 | u64 completed_seq_number; | ||
119 | |||
120 | u32 cmd_buf_read_queue_offset; | ||
121 | u32 cmd_buf_end_queue_offset; | ||
122 | }; | ||
123 | |||
124 | /* global CE app related apis */ | ||
125 | int gk20a_init_ce_support(struct gk20a *g); | ||
126 | void gk20a_ce_suspend(struct gk20a *g); | ||
127 | void gk20a_ce_destroy(struct gk20a *g); | ||
128 | |||
129 | /* CE app utility functions */ | ||
130 | u32 gk20a_ce_create_context_with_cb(struct device *dev, | ||
131 | int runlist_id, | ||
132 | int priority, | ||
133 | int timeslice, | ||
134 | int runlist_level, | ||
135 | ce_event_callback user_event_callback); | ||
136 | int gk20a_ce_execute_ops(struct device *dev, | ||
137 | u32 ce_ctx_id, | ||
138 | u64 src_buf, | ||
139 | u64 dst_buf, | ||
140 | u64 size, | ||
141 | unsigned int payload, | ||
142 | int launch_flags, | ||
143 | int request_operation, | ||
144 | struct gk20a_fence *gk20a_fence_in, | ||
145 | u32 submit_flags, | ||
146 | struct gk20a_fence **gk20a_fence_out); | ||
147 | void gk20a_ce_delete_context(struct device *dev, | ||
148 | u32 ce_ctx_id); | ||
149 | |||
150 | #ifdef CONFIG_DEBUG_FS | ||
151 | /* CE app debugfs api */ | ||
152 | void gk20a_ce_debugfs_init(struct device *dev); | ||
153 | #endif | ||
154 | |||
31 | #endif /*__CE2_GK20A_H__*/ | 155 | #endif /*__CE2_GK20A_H__*/ |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index d5457d10..447fe86a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -702,7 +702,7 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, | |||
702 | return 0; | 702 | return 0; |
703 | } | 703 | } |
704 | 704 | ||
705 | static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, | 705 | int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, |
706 | u32 level) | 706 | u32 level) |
707 | { | 707 | { |
708 | struct gk20a *g = ch->g; | 708 | struct gk20a *g = ch->g; |
@@ -1113,9 +1113,11 @@ static void gk20a_channel_update_runcb_fn(struct work_struct *work) | |||
1113 | 1113 | ||
1114 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | 1114 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, |
1115 | void (*update_fn)(struct channel_gk20a *, void *), | 1115 | void (*update_fn)(struct channel_gk20a *, void *), |
1116 | void *update_fn_data) | 1116 | void *update_fn_data, |
1117 | int runlist_id, | ||
1118 | bool is_privileged_channel) | ||
1117 | { | 1119 | { |
1118 | struct channel_gk20a *ch = gk20a_open_new_channel(g, -1, false); | 1120 | struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel); |
1119 | 1121 | ||
1120 | if (ch) { | 1122 | if (ch) { |
1121 | spin_lock(&ch->update_fn_lock); | 1123 | spin_lock(&ch->update_fn_lock); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 4b5fe1b3..971175f2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -265,7 +265,9 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, | |||
265 | bool is_privileged_channel); | 265 | bool is_privileged_channel); |
266 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, | 266 | struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, |
267 | void (*update_fn)(struct channel_gk20a *, void *), | 267 | void (*update_fn)(struct channel_gk20a *, void *), |
268 | void *update_fn_data); | 268 | void *update_fn_data, |
269 | int runlist_id, | ||
270 | bool is_privileged_channel); | ||
269 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); | 271 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); |
270 | 272 | ||
271 | int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | 273 | int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, |
@@ -295,6 +297,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | |||
295 | int *__timeslice_timeout, int *__timeslice_scale); | 297 | int *__timeslice_timeout, int *__timeslice_scale); |
296 | int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority); | 298 | int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority); |
297 | int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice); | 299 | int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice); |
300 | int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, | ||
301 | u32 level); | ||
298 | void gk20a_channel_event_id_post_event(struct channel_gk20a *ch, | 302 | void gk20a_channel_event_id_post_event(struct channel_gk20a *ch, |
299 | int event_id); | 303 | int event_id); |
300 | 304 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 5133f86a..3dd7cb02 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -165,6 +165,33 @@ u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g) | |||
165 | return reset_mask; | 165 | return reset_mask; |
166 | } | 166 | } |
167 | 167 | ||
168 | u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g) | ||
169 | { | ||
170 | u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g); | ||
171 | u32 engine_enum = ENGINE_INVAL_GK20A; | ||
172 | struct fifo_gk20a *f = NULL; | ||
173 | u32 engine_id_idx; | ||
174 | struct fifo_engine_info_gk20a *engine_info; | ||
175 | u32 active_engine_id = 0; | ||
176 | |||
177 | if (!g) | ||
178 | return ce_runlist_id; | ||
179 | |||
180 | f = &g->fifo; | ||
181 | |||
182 | for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) { | ||
183 | active_engine_id = f->active_engines_list[engine_id_idx]; | ||
184 | engine_info = &f->engine_info[active_engine_id]; | ||
185 | engine_enum = engine_info->engine_enum; | ||
186 | |||
187 | /* selecet last available ASYNC_CE if available */ | ||
188 | if (engine_enum == ENGINE_ASYNC_CE_GK20A) | ||
189 | ce_runlist_id = engine_info->runlist_id; | ||
190 | } | ||
191 | |||
192 | return ce_runlist_id; | ||
193 | } | ||
194 | |||
168 | u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g) | 195 | u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g) |
169 | { | 196 | { |
170 | u32 gr_engine_cnt = 0; | 197 | u32 gr_engine_cnt = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 3473bc78..33d6d39c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -244,6 +244,8 @@ u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g); | |||
244 | 244 | ||
245 | u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g); | 245 | u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g); |
246 | 246 | ||
247 | u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g); | ||
248 | |||
247 | u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g); | 249 | u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g); |
248 | 250 | ||
249 | bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id); | 251 | bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 50f67262..04f82033 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -773,6 +773,7 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
773 | { | 773 | { |
774 | struct gk20a *g = get_gk20a(dev); | 774 | struct gk20a *g = get_gk20a(dev); |
775 | int ret = 0; | 775 | int ret = 0; |
776 | struct gk20a_platform *platform = gk20a_get_platform(dev); | ||
776 | 777 | ||
777 | gk20a_dbg_fn(""); | 778 | gk20a_dbg_fn(""); |
778 | 779 | ||
@@ -786,6 +787,9 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
786 | /* cancel any pending cde work */ | 787 | /* cancel any pending cde work */ |
787 | gk20a_cde_suspend(g); | 788 | gk20a_cde_suspend(g); |
788 | 789 | ||
790 | if (platform->has_ce) | ||
791 | gk20a_ce_suspend(g); | ||
792 | |||
789 | ret = gk20a_channel_suspend(g); | 793 | ret = gk20a_channel_suspend(g); |
790 | if (ret) | 794 | if (ret) |
791 | goto done; | 795 | goto done; |
@@ -996,6 +1000,11 @@ int gk20a_pm_finalize_poweron(struct device *dev) | |||
996 | if (platform->has_cde) | 1000 | if (platform->has_cde) |
997 | gk20a_init_cde_support(g); | 1001 | gk20a_init_cde_support(g); |
998 | 1002 | ||
1003 | if (platform->has_ce) | ||
1004 | gk20a_init_ce_support(g); | ||
1005 | |||
1006 | gk20a_init_mm_ce_context(g); | ||
1007 | |||
999 | enable_irq(g->irq_stall); | 1008 | enable_irq(g->irq_stall); |
1000 | if (g->irq_stall != g->irq_nonstall) | 1009 | if (g->irq_stall != g->irq_nonstall) |
1001 | enable_irq(g->irq_nonstall); | 1010 | enable_irq(g->irq_nonstall); |
@@ -1658,6 +1667,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1658 | gk20a_pmu_debugfs_init(&dev->dev); | 1667 | gk20a_pmu_debugfs_init(&dev->dev); |
1659 | gk20a_railgating_debugfs_init(&dev->dev); | 1668 | gk20a_railgating_debugfs_init(&dev->dev); |
1660 | gk20a_cde_debugfs_init(&dev->dev); | 1669 | gk20a_cde_debugfs_init(&dev->dev); |
1670 | gk20a_ce_debugfs_init(&dev->dev); | ||
1661 | gk20a_alloc_debugfs_init(dev); | 1671 | gk20a_alloc_debugfs_init(dev); |
1662 | gk20a_mm_debugfs_init(&dev->dev); | 1672 | gk20a_mm_debugfs_init(&dev->dev); |
1663 | gk20a_fifo_debugfs_init(&dev->dev); | 1673 | gk20a_fifo_debugfs_init(&dev->dev); |
@@ -1693,6 +1703,9 @@ static int __exit gk20a_remove(struct platform_device *pdev) | |||
1693 | if (g->remove_support) | 1703 | if (g->remove_support) |
1694 | g->remove_support(dev); | 1704 | g->remove_support(dev); |
1695 | 1705 | ||
1706 | if (platform->has_ce) | ||
1707 | gk20a_ce_destroy(g); | ||
1708 | |||
1696 | gk20a_user_deinit(dev, &nvgpu_class); | 1709 | gk20a_user_deinit(dev, &nvgpu_class); |
1697 | 1710 | ||
1698 | debugfs_remove_recursive(platform->debugfs); | 1711 | debugfs_remove_recursive(platform->debugfs); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8aa8689b..03a698dc 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -864,6 +864,8 @@ struct gk20a { | |||
864 | 864 | ||
865 | struct nvgpu_bios bios; | 865 | struct nvgpu_bios bios; |
866 | struct debugfs_blob_wrapper bios_blob; | 866 | struct debugfs_blob_wrapper bios_blob; |
867 | |||
868 | struct gk20a_ce_app ce_app; | ||
867 | }; | 869 | }; |
868 | 870 | ||
869 | static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) | 871 | static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 750ce10c..7b2174bc 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -393,7 +393,7 @@ static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | |||
393 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 393 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
394 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | 394 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); |
395 | static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm); | 395 | static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm); |
396 | 396 | static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm); | |
397 | 397 | ||
398 | struct gk20a_dmabuf_priv { | 398 | struct gk20a_dmabuf_priv { |
399 | struct mutex lock; | 399 | struct mutex lock; |
@@ -702,6 +702,7 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block) | |||
702 | static void gk20a_remove_mm_support(struct mm_gk20a *mm) | 702 | static void gk20a_remove_mm_support(struct mm_gk20a *mm) |
703 | { | 703 | { |
704 | struct gk20a *g = gk20a_from_mm(mm); | 704 | struct gk20a *g = gk20a_from_mm(mm); |
705 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
705 | 706 | ||
706 | if (g->ops.mm.remove_bar2_vm) | 707 | if (g->ops.mm.remove_bar2_vm) |
707 | g->ops.mm.remove_bar2_vm(g); | 708 | g->ops.mm.remove_bar2_vm(g); |
@@ -709,6 +710,14 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) | |||
709 | gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); | 710 | gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); |
710 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); | 711 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); |
711 | gk20a_vm_remove_support_nofree(&mm->cde.vm); | 712 | gk20a_vm_remove_support_nofree(&mm->cde.vm); |
713 | |||
714 | if (mm->ce_vidmem_ctx_id != ~0) | ||
715 | gk20a_ce_delete_context(g->dev, mm->ce_vidmem_ctx_id ); | ||
716 | |||
717 | mm->ce_vidmem_ctx_id = ~0; | ||
718 | |||
719 | if (platform->has_ce) | ||
720 | gk20a_vm_remove_support_nofree(&mm->ce.vm); | ||
712 | } | 721 | } |
713 | 722 | ||
714 | static int gk20a_alloc_sysmem_flush(struct gk20a *g) | 723 | static int gk20a_alloc_sysmem_flush(struct gk20a *g) |
@@ -754,6 +763,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
754 | { | 763 | { |
755 | struct mm_gk20a *mm = &g->mm; | 764 | struct mm_gk20a *mm = &g->mm; |
756 | int err; | 765 | int err; |
766 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
757 | 767 | ||
758 | gk20a_dbg_fn(""); | 768 | gk20a_dbg_fn(""); |
759 | 769 | ||
@@ -775,6 +785,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
775 | 785 | ||
776 | gk20a_init_pramin(mm); | 786 | gk20a_init_pramin(mm); |
777 | 787 | ||
788 | mm->ce_vidmem_ctx_id = ~0; | ||
789 | |||
778 | err = gk20a_init_vidmem(mm); | 790 | err = gk20a_init_vidmem(mm); |
779 | if (err) | 791 | if (err) |
780 | return err; | 792 | return err; |
@@ -804,6 +816,12 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
804 | if (err) | 816 | if (err) |
805 | return err; | 817 | return err; |
806 | 818 | ||
819 | if (platform->has_ce) { | ||
820 | err = gk20a_init_ce_vm(mm); | ||
821 | if (err) | ||
822 | return err; | ||
823 | } | ||
824 | |||
807 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | 825 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ |
808 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | 826 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; |
809 | mm->remove_support = gk20a_remove_mm_support; | 827 | mm->remove_support = gk20a_remove_mm_support; |
@@ -881,6 +899,25 @@ int gk20a_init_mm_support(struct gk20a *g) | |||
881 | return err; | 899 | return err; |
882 | } | 900 | } |
883 | 901 | ||
902 | void gk20a_init_mm_ce_context(struct gk20a *g) | ||
903 | { | ||
904 | #if defined(CONFIG_GK20A_VIDMEM) | ||
905 | if (g->mm.vidmem_size && (g->mm.ce_vidmem_ctx_id == ~0)) { | ||
906 | g->mm.ce_vidmem_ctx_id = | ||
907 | gk20a_ce_create_context_with_cb(g->dev, | ||
908 | gk20a_fifo_get_fast_ce_runlist_id(g), | ||
909 | -1, | ||
910 | -1, | ||
911 | -1, | ||
912 | NULL); | ||
913 | |||
914 | if (g->mm.ce_vidmem_ctx_id == ~0) | ||
915 | gk20a_err(g->dev, | ||
916 | "Failed to allocate CE context for vidmem page clearing support"); | ||
917 | } | ||
918 | #endif | ||
919 | } | ||
920 | |||
884 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | 921 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, |
885 | struct gk20a_mm_entry *entry) | 922 | struct gk20a_mm_entry *entry) |
886 | { | 923 | { |
@@ -2484,6 +2521,7 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2484 | struct device *d = &g->mm.vidmem_dev; | 2521 | struct device *d = &g->mm.vidmem_dev; |
2485 | int err; | 2522 | int err; |
2486 | dma_addr_t iova; | 2523 | dma_addr_t iova; |
2524 | bool need_pramin_access = true; | ||
2487 | DEFINE_DMA_ATTRS(attrs); | 2525 | DEFINE_DMA_ATTRS(attrs); |
2488 | 2526 | ||
2489 | gk20a_dbg_fn(""); | 2527 | gk20a_dbg_fn(""); |
@@ -2519,7 +2557,38 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | |||
2519 | mem->size = size; | 2557 | mem->size = size; |
2520 | mem->aperture = APERTURE_VIDMEM; | 2558 | mem->aperture = APERTURE_VIDMEM; |
2521 | 2559 | ||
2522 | gk20a_memset(g, mem, 0, 0, size); | 2560 | if (g->mm.ce_vidmem_ctx_id != ~0) { |
2561 | struct gk20a_fence *gk20a_fence_out = NULL; | ||
2562 | u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
2563 | |||
2564 | err = gk20a_ce_execute_ops(g->dev, | ||
2565 | g->mm.ce_vidmem_ctx_id, | ||
2566 | 0, | ||
2567 | dst_bufbase, | ||
2568 | (u64)size, | ||
2569 | 0x00000000, | ||
2570 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | ||
2571 | NVGPU_CE_MEMSET, | ||
2572 | NULL, | ||
2573 | 0, | ||
2574 | &gk20a_fence_out); | ||
2575 | |||
2576 | if (!err) { | ||
2577 | if (gk20a_fence_out) { | ||
2578 | err = gk20a_fence_wait(gk20a_fence_out, gk20a_get_gr_idle_timeout(g)); | ||
2579 | gk20a_fence_put(gk20a_fence_out); | ||
2580 | if (err) | ||
2581 | gk20a_err(g->dev, | ||
2582 | "Failed to get the fence_out from CE execute ops"); | ||
2583 | else | ||
2584 | need_pramin_access = false; | ||
2585 | } | ||
2586 | } else | ||
2587 | gk20a_err(g->dev, "Failed gk20a_ce_execute_ops[%d]",err); | ||
2588 | } | ||
2589 | |||
2590 | if (need_pramin_access) | ||
2591 | gk20a_memset(g, mem, 0, 0, size); | ||
2523 | 2592 | ||
2524 | gk20a_dbg_fn("done"); | 2593 | gk20a_dbg_fn("done"); |
2525 | 2594 | ||
@@ -4125,6 +4194,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) | |||
4125 | false, false, "cde"); | 4194 | false, false, "cde"); |
4126 | } | 4195 | } |
4127 | 4196 | ||
4197 | static int gk20a_init_ce_vm(struct mm_gk20a *mm) | ||
4198 | { | ||
4199 | struct vm_gk20a *vm = &mm->ce.vm; | ||
4200 | struct gk20a *g = gk20a_from_mm(mm); | ||
4201 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | ||
4202 | |||
4203 | return gk20a_init_vm(mm, vm, big_page_size, | ||
4204 | SZ_4K * 16, | ||
4205 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
4206 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
4207 | false, false, "ce"); | ||
4208 | } | ||
4209 | |||
4128 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block, | 4210 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block, |
4129 | struct vm_gk20a *vm) | 4211 | struct vm_gk20a *vm) |
4130 | { | 4212 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 66e46480..184c1f71 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -307,6 +307,7 @@ int gk20a_init_mm_support(struct gk20a *g); | |||
307 | int gk20a_init_mm_setup_sw(struct gk20a *g); | 307 | int gk20a_init_mm_setup_sw(struct gk20a *g); |
308 | int gk20a_init_mm_setup_hw(struct gk20a *g); | 308 | int gk20a_init_mm_setup_hw(struct gk20a *g); |
309 | void gk20a_mm_debugfs_init(struct device *dev); | 309 | void gk20a_mm_debugfs_init(struct device *dev); |
310 | void gk20a_init_mm_ce_context(struct gk20a *g); | ||
310 | 311 | ||
311 | int gk20a_mm_fb_flush(struct gk20a *g); | 312 | int gk20a_mm_fb_flush(struct gk20a *g); |
312 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); | 313 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); |
@@ -349,6 +350,10 @@ struct mm_gk20a { | |||
349 | struct vm_gk20a vm; | 350 | struct vm_gk20a vm; |
350 | } cde; | 351 | } cde; |
351 | 352 | ||
353 | struct { | ||
354 | struct vm_gk20a vm; | ||
355 | } ce; | ||
356 | |||
352 | struct mutex l2_op_lock; | 357 | struct mutex l2_op_lock; |
353 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | 358 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC |
354 | struct mem_desc bar2_desc; | 359 | struct mem_desc bar2_desc; |
@@ -388,6 +393,7 @@ struct mm_gk20a { | |||
388 | 393 | ||
389 | size_t vidmem_size; | 394 | size_t vidmem_size; |
390 | struct device vidmem_dev; | 395 | struct device vidmem_dev; |
396 | u32 ce_vidmem_ctx_id; | ||
391 | }; | 397 | }; |
392 | 398 | ||
393 | int gk20a_mm_init(struct mm_gk20a *mm); | 399 | int gk20a_mm_init(struct mm_gk20a *mm); |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index 543f9873..5bde3439 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -209,6 +209,8 @@ struct gk20a_platform { | |||
209 | 209 | ||
210 | bool has_cde; | 210 | bool has_cde; |
211 | 211 | ||
212 | bool has_ce; | ||
213 | |||
212 | /* soc name for finding firmware files */ | 214 | /* soc name for finding firmware files */ |
213 | const char *soc_name; | 215 | const char *soc_name; |
214 | 216 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index 2ed6df43..745d963c 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -900,6 +900,8 @@ struct gk20a_platform gk20a_tegra_platform = { | |||
900 | .secure_page_alloc = gk20a_tegra_secure_page_alloc, | 900 | .secure_page_alloc = gk20a_tegra_secure_page_alloc, |
901 | .dump_platform_dependencies = gk20a_tegra_debug_dump, | 901 | .dump_platform_dependencies = gk20a_tegra_debug_dump, |
902 | 902 | ||
903 | .has_ce = true, | ||
904 | |||
903 | .soc_name = "tegra12x", | 905 | .soc_name = "tegra12x", |
904 | 906 | ||
905 | .vidmem_is_vidmem = false, | 907 | .vidmem_is_vidmem = false, |
@@ -962,6 +964,8 @@ struct gk20a_platform gm20b_tegra_platform = { | |||
962 | 964 | ||
963 | .has_cde = true, | 965 | .has_cde = true, |
964 | 966 | ||
967 | .has_ce = true, | ||
968 | |||
965 | .soc_name = "tegra21x", | 969 | .soc_name = "tegra21x", |
966 | 970 | ||
967 | .vidmem_is_vidmem = false, | 971 | .vidmem_is_vidmem = false, |
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index ea6f3b4c..fcf63ddc 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c | |||
@@ -56,6 +56,8 @@ static struct gk20a_platform nvgpu_pci_device = { | |||
56 | 56 | ||
57 | .ch_wdt_timeout_ms = 7000, | 57 | .ch_wdt_timeout_ms = 7000, |
58 | .disable_bigpage = true, | 58 | .disable_bigpage = true, |
59 | |||
60 | .has_ce = true, | ||
59 | }; | 61 | }; |
60 | 62 | ||
61 | static struct pci_device_id nvgpu_pci_table[] = { | 63 | static struct pci_device_id nvgpu_pci_table[] = { |