summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c617
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.h124
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c86
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c4
-rw-r--r--drivers/gpu/nvgpu/pci.c2
14 files changed, 896 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 4b84dc69..f5b68e72 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1186,7 +1186,9 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1186 } 1186 }
1187 1187
1188 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, 1188 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1189 cde_ctx); 1189 cde_ctx,
1190 -1,
1191 false);
1190 if (!ch) { 1192 if (!ch) {
1191 gk20a_warn(cde_ctx->dev, "cde: gk20a channel not available"); 1193 gk20a_warn(cde_ctx->dev, "cde: gk20a channel not available");
1192 err = -ENOMEM; 1194 err = -ENOMEM;
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 96d38b11..e2f2d9e9 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -24,6 +24,7 @@
24#include <trace/events/gk20a.h> 24#include <trace/events/gk20a.h>
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
26#include <linux/nvhost.h> 26#include <linux/nvhost.h>
27#include <linux/debugfs.h>
27 28
28#include "gk20a.h" 29#include "gk20a.h"
29#include "debug_gk20a.h" 30#include "debug_gk20a.h"
@@ -96,3 +97,619 @@ void gk20a_init_ce2(struct gpu_ops *gops)
96 gops->ce2.isr_stall = gk20a_ce2_isr; 97 gops->ce2.isr_stall = gk20a_ce2_isr;
97 gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr; 98 gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr;
98} 99}
100
101/* static CE app api */
102static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event)
103{
104 struct gk20a_ce_app *ce_app = &g->ce_app;
105 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
106
107 if (!ce_app->initialised)
108 return;
109
110 mutex_lock(&ce_app->app_mutex);
111
112 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
113 &ce_app->allocated_contexts, list) {
114 if (ce_ctx->user_event_callback) {
115 ce_ctx->user_event_callback(ce_ctx->ctx_id,
116 event);
117 }
118 }
119
120 mutex_unlock(&ce_app->app_mutex);
121}
122
123static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data)
124{
125 struct gk20a_gpu_ctx *ce_ctx = data;
126 bool channel_idle;
127 u32 event;
128
129 mutex_lock(&ch->jobs_lock);
130 channel_idle = list_empty(&ch->jobs);
131 mutex_unlock(&ch->jobs_lock);
132
133 if (!channel_idle)
134 return;
135
136 gk20a_dbg(gpu_dbg_fn, "ce: finished %p", ce_ctx);
137
138 if (ch->has_timedout)
139 event = NVGPU_CE_CONTEXT_JOB_TIMEDOUT;
140 else
141 event = NVGPU_CE_CONTEXT_JOB_COMPLETED;
142
143 if (ce_ctx->user_event_callback)
144 ce_ctx->user_event_callback(ce_ctx->ctx_id,
145 event);
146
147 ++ce_ctx->completed_seq_number;
148}
149
150static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx)
151{
152 u32 cmd_buf_index;
153 u32 cmd_buf_read_offset;
154 u32 fence_index;
155 u32 *cmd_buf_cpu_va;
156
157 for (cmd_buf_index = 0;
158 cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset;
159 cmd_buf_index++) {
160 cmd_buf_read_offset = (cmd_buf_index *
161 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
162
163 /* at end of command buffer has gk20a_fence for command buffer sync */
164 fence_index = (cmd_buf_read_offset +
165 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
166 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
167
168 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
169
170 /* 0 is treated as invalid pre-sync */
171 if (cmd_buf_cpu_va[fence_index]) {
172 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
173
174 memcpy((void *)&ce_cmd_buf_fence_in,
175 (void *)(cmd_buf_cpu_va + fence_index),
176 sizeof(struct gk20a_fence *));
177 gk20a_fence_put(ce_cmd_buf_fence_in);
178 /* Reset the stored last pre-sync */
179 memset((void *)(cmd_buf_cpu_va + fence_index),
180 0,
181 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
182 }
183 }
184}
185
186/* assume this api should need to call under mutex_lock(&ce_app->app_mutex) */
187static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
188{
189 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
190
191 mutex_lock(&ce_ctx->gpu_ctx_mutex);
192
193 gk20a_ce_free_command_buffer_stored_fence(ce_ctx);
194
195 gk20a_gmmu_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
196
197 /* free the channel */
198 if (ce_ctx->ch)
199 gk20a_channel_close(ce_ctx->ch);
200
201 /* housekeeping on app */
202 list_del(&ce_ctx->list);
203
204 mutex_unlock(&ce_ctx->gpu_ctx_mutex);
205 mutex_destroy(&ce_ctx->gpu_ctx_mutex);
206
207 kfree(ce_ctx);
208}
209
210static inline int gk20a_ce_get_method_size(int request_operation)
211{
212 /* failure size */
213 int methodsize = ~0;
214
215 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER)
216 methodsize = 10 * 2 * sizeof(u32);
217 else if (request_operation & NVGPU_CE_MEMSET)
218 methodsize = 9 * 2 * sizeof(u32);
219
220 return methodsize;
221}
222
223static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
224{
225 /* there is no local memory available,
226 don't allow local memory related CE flags */
227 if (!g->mm.vidmem_size) {
228 launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
229 NVGPU_CE_DST_LOCATION_LOCAL_FB);
230 }
231 return launch_flags;
232}
233
234static int gk20a_ce_prepare_submit(u64 src_buf,
235 u64 dst_buf,
236 u64 size,
237 u32 *cmd_buf_cpu_va,
238 u32 max_cmd_buf_size,
239 unsigned int payload,
240 int launch_flags,
241 int request_operation,
242 u32 dma_copy_class,
243 struct gk20a_fence *gk20a_fence_in)
244{
245 u32 launch = 0;
246 u32 methodSize = 0;
247
248 /* failure case handling */
249 if ((gk20a_ce_get_method_size(request_operation) > max_cmd_buf_size) ||
250 (!size) ||
251 (request_operation > NVGPU_CE_MEMSET))
252 return 0;
253
254 /* set the channel object */
255 cmd_buf_cpu_va[methodSize++] = 0x20018000;
256 cmd_buf_cpu_va[methodSize++] = dma_copy_class;
257
258 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) {
259 /* setup the source */
260 cmd_buf_cpu_va[methodSize++] = 0x20018101;
261 cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf) &
262 NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
263
264 cmd_buf_cpu_va[methodSize++] = 0x20018100;
265 cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf) &
266 NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
267
268 cmd_buf_cpu_va[methodSize++] = 0x20018098;
269 if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) {
270 cmd_buf_cpu_va[methodSize++] = 0x00000000;
271 } else if (launch_flags & NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) {
272 cmd_buf_cpu_va[methodSize++] = 0x00000002;
273 } else {
274 cmd_buf_cpu_va[methodSize++] = 0x00000001;
275 }
276
277 launch |= 0x00001000;
278 } else if (request_operation & NVGPU_CE_MEMSET) {
279 cmd_buf_cpu_va[methodSize++] = 0x200181c2;
280 cmd_buf_cpu_va[methodSize++] = 0x00030004;
281
282 cmd_buf_cpu_va[methodSize++] = 0x200181c0;
283 cmd_buf_cpu_va[methodSize++] = payload;
284
285 launch |= 0x00000400;
286
287 /* converted into number of words */
288 size /= sizeof(u32);
289 }
290
291 /* setup the destination/output */
292 cmd_buf_cpu_va[methodSize++] = 0x20018103;
293 cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
294
295 cmd_buf_cpu_va[methodSize++] = 0x20018102;
296 cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
297
298 cmd_buf_cpu_va[methodSize++] = 0x20018099;
299 if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) {
300 cmd_buf_cpu_va[methodSize++] = 0x00000000;
301 } else if (launch_flags & NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) {
302 cmd_buf_cpu_va[methodSize++] = 0x00000002;
303 } else {
304 cmd_buf_cpu_va[methodSize++] = 0x00000001;
305 }
306
307 launch |= 0x00002000;
308
309 /* setup the format */
310 cmd_buf_cpu_va[methodSize++] = 0x20018107;
311 cmd_buf_cpu_va[methodSize++] = 1;
312 cmd_buf_cpu_va[methodSize++] = 0x20018106;
313 cmd_buf_cpu_va[methodSize++] = u64_lo32(size);
314
315 launch |= 0x00000004;
316
317 if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR)
318 launch |= 0x00000000;
319 else
320 launch |= 0x00000080;
321
322 if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR)
323 launch |= 0x00000000;
324 else
325 launch |= 0x00000100;
326
327 if (launch_flags & NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED)
328 launch |= 0x00000002;
329 else
330 launch |= 0x00000001;
331
332 cmd_buf_cpu_va[methodSize++] = 0x200180c0;
333 cmd_buf_cpu_va[methodSize++] = launch;
334
335 return methodSize;
336}
337
338/* global CE app related apis */
339int gk20a_init_ce_support(struct gk20a *g)
340{
341 struct gk20a_ce_app *ce_app = &g->ce_app;
342
343 if (ce_app->initialised) {
344 /* assume this happen during poweron/poweroff GPU sequence */
345 ce_app->app_state = NVGPU_CE_ACTIVE;
346 gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_RESUME);
347 return 0;
348 }
349
350 gk20a_dbg(gpu_dbg_fn, "ce: init");
351
352 mutex_init(&ce_app->app_mutex);
353 mutex_lock(&ce_app->app_mutex);
354
355 INIT_LIST_HEAD(&ce_app->allocated_contexts);
356 ce_app->ctx_count = 0;
357 ce_app->next_ctx_id = 0;
358 ce_app->initialised = true;
359 ce_app->app_state = NVGPU_CE_ACTIVE;
360
361 mutex_unlock(&ce_app->app_mutex);
362 gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished");
363
364 return 0;
365}
366
367void gk20a_ce_destroy(struct gk20a *g)
368{
369 struct gk20a_ce_app *ce_app = &g->ce_app;
370 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
371
372 if (!ce_app->initialised)
373 return;
374
375 ce_app->app_state = NVGPU_CE_SUSPEND;
376 ce_app->initialised = false;
377
378 mutex_lock(&ce_app->app_mutex);
379
380 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
381 &ce_app->allocated_contexts, list) {
382 gk20a_ce_delete_gpu_context(ce_ctx);
383 }
384
385 INIT_LIST_HEAD(&ce_app->allocated_contexts);
386 ce_app->ctx_count = 0;
387 ce_app->next_ctx_id = 0;
388
389 mutex_unlock(&ce_app->app_mutex);
390 mutex_destroy(&ce_app->app_mutex);
391}
392
393void gk20a_ce_suspend(struct gk20a *g)
394{
395 struct gk20a_ce_app *ce_app = &g->ce_app;
396
397 if (!ce_app->initialised)
398 return;
399
400 ce_app->app_state = NVGPU_CE_SUSPEND;
401 gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_SUSPEND);
402
403 return;
404}
405
406/* CE app utility functions */
407u32 gk20a_ce_create_context_with_cb(struct device *dev,
408 int runlist_id,
409 int priority,
410 int timeslice,
411 int runlist_level,
412 ce_event_callback user_event_callback)
413{
414 struct gk20a_gpu_ctx *ce_ctx;
415 struct gk20a *g = gk20a_from_dev(dev);
416 struct gk20a_ce_app *ce_app = &g->ce_app;
417 u32 ctx_id = ~0;
418 int err = 0;
419
420 if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE)
421 return ctx_id;
422
423 ce_ctx = kzalloc(sizeof(*ce_ctx), GFP_KERNEL);
424 if (!ce_ctx)
425 return ctx_id;
426
427 mutex_init(&ce_ctx->gpu_ctx_mutex);
428
429 ce_ctx->g = g;
430 ce_ctx->dev = g->dev;
431 ce_ctx->user_event_callback = user_event_callback;
432
433 ce_ctx->cmd_buf_read_queue_offset = 0;
434 ce_ctx->cmd_buf_end_queue_offset =
435 (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF);
436
437 ce_ctx->submitted_seq_number = 0;
438 ce_ctx->completed_seq_number = 0;
439
440 /* always kernel client needs privileged channel */
441 ce_ctx->ch = gk20a_open_new_channel_with_cb(g, gk20a_ce_finished_ctx_cb,
442 ce_ctx,
443 runlist_id,
444 true);
445 if (!ce_ctx->ch) {
446 gk20a_err(ce_ctx->dev, "ce: gk20a channel not available");
447 goto end;
448 }
449
450 /* bind the channel to the vm */
451 gk20a_vm_get(&g->mm.ce.vm);
452 ce_ctx->vm = ce_ctx->ch->vm = &g->mm.ce.vm;
453 err = channel_gk20a_commit_va(ce_ctx->ch);
454 if (err) {
455 gk20a_err(ce_ctx->dev, "ce: could not bind vm");
456 goto end;
457 }
458
459 /* allocate gpfifo (1024 should be more than enough) */
460 err = gk20a_alloc_channel_gpfifo(ce_ctx->ch,
461 &(struct nvgpu_alloc_gpfifo_args){1024, 0});
462 if (err) {
463 gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo");
464 goto end;
465 }
466
467 /* allocate command buffer (4096 should be more than enough) from sysmem*/
468 err = gk20a_gmmu_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem);
469 if (err) {
470 gk20a_err(ce_ctx->dev,
471 "ce: could not allocate command buffer for CE context");
472 goto end;
473 }
474
475 memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size);
476
477 /* -1 means default channel priority */
478 if (priority != -1) {
479 err = gk20a_channel_set_priority(ce_ctx->ch, priority);
480 if (err) {
481 gk20a_err(ce_ctx->dev,
482 "ce: could not set the channel priority for CE context");
483 goto end;
484 }
485 }
486
487 /* -1 means default channel timeslice value */
488 if (timeslice != -1) {
489 err = gk20a_channel_set_timeslice(ce_ctx->ch, timeslice);
490 if (err) {
491 gk20a_err(ce_ctx->dev,
492 "ce: could not set the channel timeslice value for CE context");
493 goto end;
494 }
495 }
496
497 /* -1 means default channel runlist level */
498 if (runlist_level != -1) {
499 err = gk20a_channel_set_runlist_interleave(ce_ctx->ch, runlist_level);
500 if (err) {
501 gk20a_err(ce_ctx->dev,
502 "ce: could not set the runlist interleave for CE context");
503 goto end;
504 }
505 }
506
507 mutex_lock(&ce_app->app_mutex);
508 ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
509 list_add(&ce_ctx->list, &ce_app->allocated_contexts);
510 ++ce_app->next_ctx_id;
511 ++ce_app->ctx_count;
512 mutex_unlock(&ce_app->app_mutex);
513
514 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
515
516end:
517 if (ctx_id == ~0) {
518 mutex_lock(&ce_app->app_mutex);
519 gk20a_ce_delete_gpu_context(ce_ctx);
520 mutex_unlock(&ce_app->app_mutex);
521 }
522 return ctx_id;
523
524}
525EXPORT_SYMBOL(gk20a_ce_create_context_with_cb);
526
527int gk20a_ce_execute_ops(struct device *dev,
528 u32 ce_ctx_id,
529 u64 src_buf,
530 u64 dst_buf,
531 u64 size,
532 unsigned int payload,
533 int launch_flags,
534 int request_operation,
535 struct gk20a_fence *gk20a_fence_in,
536 u32 submit_flags,
537 struct gk20a_fence **gk20a_fence_out)
538{
539 int ret = -EPERM;
540 struct gk20a *g = gk20a_from_dev(dev);
541 struct gk20a_ce_app *ce_app = &g->ce_app;
542 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
543 bool found = false;
544 u32 *cmd_buf_cpu_va;
545 u64 cmd_buf_gpu_va = 0;
546 u32 methodSize;
547 u32 cmd_buf_read_offset;
548 u32 fence_index;
549 struct nvgpu_gpfifo gpfifo;
550 struct nvgpu_fence fence = {0,0};
551 struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
552 struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics;
553
554 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
555 goto end;
556
557 mutex_lock(&ce_app->app_mutex);
558
559 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
560 &ce_app->allocated_contexts, list) {
561 if (ce_ctx->ctx_id == ce_ctx_id) {
562 found = true;
563 break;
564 }
565 }
566
567 mutex_unlock(&ce_app->app_mutex);
568
569 if (!found) {
570 ret = -EINVAL;
571 goto end;
572 }
573
574 if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
575 ret = -ENODEV;
576 goto end;
577 }
578
579 mutex_lock(&ce_ctx->gpu_ctx_mutex);
580
581 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
582
583 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
584 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
585
586 /* at end of command buffer has gk20a_fence for command buffer sync */
587 fence_index = (cmd_buf_read_offset +
588 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
589 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
590
591 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
592 ret = -ENOMEM;
593 goto noop;
594 }
595
596 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
597
598 /* 0 is treated as invalid pre-sync */
599 if (cmd_buf_cpu_va[fence_index]) {
600 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
601
602 memcpy((void *)&ce_cmd_buf_fence_in,
603 (void *)(cmd_buf_cpu_va + fence_index),
604 sizeof(struct gk20a_fence *));
605 ret = gk20a_fence_wait(ce_cmd_buf_fence_in, gk20a_get_gr_idle_timeout(g));
606
607 gk20a_fence_put(ce_cmd_buf_fence_in);
608 /* Reset the stored last pre-sync */
609 memset((void *)(cmd_buf_cpu_va + fence_index),
610 0,
611 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
612 if (ret)
613 goto noop;
614 }
615
616 cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
617
618 methodSize = gk20a_ce_prepare_submit(src_buf,
619 dst_buf,
620 size,
621 &cmd_buf_cpu_va[cmd_buf_read_offset],
622 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
623 payload,
624 gk20a_get_valid_launch_flags(g, launch_flags),
625 request_operation,
626 gpu_capability->dma_copy_class,
627 gk20a_fence_in);
628
629 if (methodSize) {
630 /* TODO: Remove CPU pre-fence wait */
631 if (gk20a_fence_in) {
632 ret = gk20a_fence_wait(gk20a_fence_in, gk20a_get_gr_idle_timeout(g));
633 gk20a_fence_put(gk20a_fence_in);
634 if (ret)
635 goto noop;
636 }
637
638 /* store the element into gpfifo */
639 gpfifo.entry0 =
640 u64_lo32(cmd_buf_gpu_va);
641 gpfifo.entry1 =
642 (u64_hi32(cmd_buf_gpu_va) |
643 pbdma_gp_entry1_length_f(methodSize));
644
645 /* take always the postfence as it is needed for protecting the ce context */
646 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
647
648 wmb();
649
650 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
651 1, submit_flags, &fence, &ce_cmd_buf_fence_out, true);
652
653 if (!ret) {
654 memcpy((void *)(cmd_buf_cpu_va + fence_index),
655 (void *)&ce_cmd_buf_fence_out,
656 sizeof(struct gk20a_fence *));
657
658 if (gk20a_fence_out) {
659 gk20a_fence_get(ce_cmd_buf_fence_out);
660 *gk20a_fence_out = ce_cmd_buf_fence_out;
661 }
662
663 /* Next available command buffer queue Index */
664 ++ce_ctx->cmd_buf_read_queue_offset;
665 ++ce_ctx->submitted_seq_number;
666 }
667 } else
668 ret = -ENOMEM;
669noop:
670 mutex_unlock(&ce_ctx->gpu_ctx_mutex);
671end:
672 return ret;
673}
674EXPORT_SYMBOL(gk20a_ce_execute_ops);
675
676void gk20a_ce_delete_context(struct device *dev,
677 u32 ce_ctx_id)
678{
679 struct gk20a *g = gk20a_from_dev(dev);
680 struct gk20a_ce_app *ce_app = &g->ce_app;
681 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
682
683 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
684 return;
685
686 mutex_lock(&ce_app->app_mutex);
687
688 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
689 &ce_app->allocated_contexts, list) {
690 if (ce_ctx->ctx_id == ce_ctx_id) {
691 gk20a_ce_delete_gpu_context(ce_ctx);
692 --ce_app->ctx_count;
693 break;
694 }
695 }
696
697 mutex_unlock(&ce_app->app_mutex);
698 return;
699}
700EXPORT_SYMBOL(gk20a_ce_delete_context);
701
702#ifdef CONFIG_DEBUG_FS
703void gk20a_ce_debugfs_init(struct device *dev)
704{
705 struct gk20a_platform *platform = dev_get_drvdata(dev);
706 struct gk20a *g = get_gk20a(dev);
707
708 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
709 platform->debugfs, &g->ce_app.ctx_count);
710 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
711 platform->debugfs, &g->ce_app.app_state);
712 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
713 platform->debugfs, &g->ce_app.next_ctx_id);
714}
715#endif
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 5ceb69e1..3b53834d 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -28,4 +28,128 @@ void gk20a_init_ce2(struct gpu_ops *gops);
28void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base); 28void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
29void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); 29void gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
30 30
31/* CE command utility macros */
32#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
33#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff
34
35#define NVGPU_CE_COMMAND_BUF_SIZE 4096
36#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF 128
37#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING 8
38
39typedef void (*ce_event_callback)(u32 ce_ctx_id, u32 ce_event_flag);
40
41/* dma launch_flags */
42enum {
43 /* location */
44 NVGPU_CE_SRC_LOCATION_COHERENT_SYSMEM = (1 << 0),
45 NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM = (1 << 1),
46 NVGPU_CE_SRC_LOCATION_LOCAL_FB = (1 << 2),
47 NVGPU_CE_DST_LOCATION_COHERENT_SYSMEM = (1 << 3),
48 NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM = (1 << 4),
49 NVGPU_CE_DST_LOCATION_LOCAL_FB = (1 << 5),
50
51 /* memory layout */
52 NVGPU_CE_SRC_MEMORY_LAYOUT_PITCH = (1 << 6),
53 NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 7),
54 NVGPU_CE_DST_MEMORY_LAYOUT_PITCH = (1 << 8),
55 NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR = (1 << 9),
56
57 /* transfer type */
58 NVGPU_CE_DATA_TRANSFER_TYPE_PIPELINED = (1 << 10),
59 NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED = (1 << 11),
60};
61
62/* CE operation mode */
63enum {
64 NVGPU_CE_PHYS_MODE_TRANSFER = (1 << 0),
65 NVGPU_CE_MEMSET = (1 << 1),
66};
67
68/* CE event flags */
69enum {
70 NVGPU_CE_CONTEXT_JOB_COMPLETED = (1 << 0),
71 NVGPU_CE_CONTEXT_JOB_TIMEDOUT = (1 << 1),
72 NVGPU_CE_CONTEXT_SUSPEND = (1 << 2),
73 NVGPU_CE_CONTEXT_RESUME = (1 << 3),
74};
75
76/* CE app state machine flags */
77enum {
78 NVGPU_CE_ACTIVE = (1 << 0),
79 NVGPU_CE_SUSPEND = (1 << 1),
80};
81
82/* gpu context state machine flags */
83enum {
84 NVGPU_CE_GPU_CTX_ALLOCATED = (1 << 0),
85 NVGPU_CE_GPU_CTX_DELETED = (1 << 1),
86};
87
88/* global ce app db */
89struct gk20a_ce_app {
90 bool initialised;
91 struct mutex app_mutex;
92 int app_state;
93
94 struct list_head allocated_contexts;
95 u32 ctx_count;
96 u32 next_ctx_id;
97};
98
99/* ce context db */
100struct gk20a_gpu_ctx {
101 struct gk20a *g;
102 struct device *dev;
103 u32 ctx_id;
104 struct mutex gpu_ctx_mutex;
105 int gpu_ctx_state;
106 ce_event_callback user_event_callback;
107
108 /* channel related data */
109 struct channel_gk20a *ch;
110 struct vm_gk20a *vm;
111
112 /* cmd buf mem_desc */
113 struct mem_desc cmd_buf_mem;
114
115 struct list_head list;
116
117 u64 submitted_seq_number;
118 u64 completed_seq_number;
119
120 u32 cmd_buf_read_queue_offset;
121 u32 cmd_buf_end_queue_offset;
122};
123
124/* global CE app related apis */
125int gk20a_init_ce_support(struct gk20a *g);
126void gk20a_ce_suspend(struct gk20a *g);
127void gk20a_ce_destroy(struct gk20a *g);
128
129/* CE app utility functions */
130u32 gk20a_ce_create_context_with_cb(struct device *dev,
131 int runlist_id,
132 int priority,
133 int timeslice,
134 int runlist_level,
135 ce_event_callback user_event_callback);
136int gk20a_ce_execute_ops(struct device *dev,
137 u32 ce_ctx_id,
138 u64 src_buf,
139 u64 dst_buf,
140 u64 size,
141 unsigned int payload,
142 int launch_flags,
143 int request_operation,
144 struct gk20a_fence *gk20a_fence_in,
145 u32 submit_flags,
146 struct gk20a_fence **gk20a_fence_out);
147void gk20a_ce_delete_context(struct device *dev,
148 u32 ce_ctx_id);
149
150#ifdef CONFIG_DEBUG_FS
151/* CE app debugfs api */
152void gk20a_ce_debugfs_init(struct device *dev);
153#endif
154
31#endif /*__CE2_GK20A_H__*/ 155#endif /*__CE2_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d5457d10..447fe86a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -702,7 +702,7 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
702 return 0; 702 return 0;
703} 703}
704 704
705static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, 705int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
706 u32 level) 706 u32 level)
707{ 707{
708 struct gk20a *g = ch->g; 708 struct gk20a *g = ch->g;
@@ -1113,9 +1113,11 @@ static void gk20a_channel_update_runcb_fn(struct work_struct *work)
1113 1113
1114struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, 1114struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
1115 void (*update_fn)(struct channel_gk20a *, void *), 1115 void (*update_fn)(struct channel_gk20a *, void *),
1116 void *update_fn_data) 1116 void *update_fn_data,
1117 int runlist_id,
1118 bool is_privileged_channel)
1117{ 1119{
1118 struct channel_gk20a *ch = gk20a_open_new_channel(g, -1, false); 1120 struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
1119 1121
1120 if (ch) { 1122 if (ch) {
1121 spin_lock(&ch->update_fn_lock); 1123 spin_lock(&ch->update_fn_lock);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4b5fe1b3..971175f2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -265,7 +265,9 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
265 bool is_privileged_channel); 265 bool is_privileged_channel);
266struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, 266struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
267 void (*update_fn)(struct channel_gk20a *, void *), 267 void (*update_fn)(struct channel_gk20a *, void *),
268 void *update_fn_data); 268 void *update_fn_data,
269 int runlist_id,
270 bool is_privileged_channel);
269void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); 271void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
270 272
271int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, 273int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
@@ -295,6 +297,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
295 int *__timeslice_timeout, int *__timeslice_scale); 297 int *__timeslice_timeout, int *__timeslice_scale);
296int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority); 298int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority);
297int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice); 299int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice);
300int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
301 u32 level);
298void gk20a_channel_event_id_post_event(struct channel_gk20a *ch, 302void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
299 int event_id); 303 int event_id);
300 304
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 5133f86a..3dd7cb02 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -165,6 +165,33 @@ u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
165 return reset_mask; 165 return reset_mask;
166} 166}
167 167
168u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g)
169{
170 u32 ce_runlist_id = gk20a_fifo_get_gr_runlist_id(g);
171 u32 engine_enum = ENGINE_INVAL_GK20A;
172 struct fifo_gk20a *f = NULL;
173 u32 engine_id_idx;
174 struct fifo_engine_info_gk20a *engine_info;
175 u32 active_engine_id = 0;
176
177 if (!g)
178 return ce_runlist_id;
179
180 f = &g->fifo;
181
182 for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
183 active_engine_id = f->active_engines_list[engine_id_idx];
184 engine_info = &f->engine_info[active_engine_id];
185 engine_enum = engine_info->engine_enum;
186
187 /* selecet last available ASYNC_CE if available */
188 if (engine_enum == ENGINE_ASYNC_CE_GK20A)
189 ce_runlist_id = engine_info->runlist_id;
190 }
191
192 return ce_runlist_id;
193}
194
168u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g) 195u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
169{ 196{
170 u32 gr_engine_cnt = 0; 197 u32 gr_engine_cnt = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 3473bc78..33d6d39c 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -244,6 +244,8 @@ u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g);
244 244
245u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g); 245u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g);
246 246
247u32 gk20a_fifo_get_fast_ce_runlist_id(struct gk20a *g);
248
247u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g); 249u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g);
248 250
249bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id); 251bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 50f67262..04f82033 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -773,6 +773,7 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
773{ 773{
774 struct gk20a *g = get_gk20a(dev); 774 struct gk20a *g = get_gk20a(dev);
775 int ret = 0; 775 int ret = 0;
776 struct gk20a_platform *platform = gk20a_get_platform(dev);
776 777
777 gk20a_dbg_fn(""); 778 gk20a_dbg_fn("");
778 779
@@ -786,6 +787,9 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
786 /* cancel any pending cde work */ 787 /* cancel any pending cde work */
787 gk20a_cde_suspend(g); 788 gk20a_cde_suspend(g);
788 789
790 if (platform->has_ce)
791 gk20a_ce_suspend(g);
792
789 ret = gk20a_channel_suspend(g); 793 ret = gk20a_channel_suspend(g);
790 if (ret) 794 if (ret)
791 goto done; 795 goto done;
@@ -996,6 +1000,11 @@ int gk20a_pm_finalize_poweron(struct device *dev)
996 if (platform->has_cde) 1000 if (platform->has_cde)
997 gk20a_init_cde_support(g); 1001 gk20a_init_cde_support(g);
998 1002
1003 if (platform->has_ce)
1004 gk20a_init_ce_support(g);
1005
1006 gk20a_init_mm_ce_context(g);
1007
999 enable_irq(g->irq_stall); 1008 enable_irq(g->irq_stall);
1000 if (g->irq_stall != g->irq_nonstall) 1009 if (g->irq_stall != g->irq_nonstall)
1001 enable_irq(g->irq_nonstall); 1010 enable_irq(g->irq_nonstall);
@@ -1658,6 +1667,7 @@ static int gk20a_probe(struct platform_device *dev)
1658 gk20a_pmu_debugfs_init(&dev->dev); 1667 gk20a_pmu_debugfs_init(&dev->dev);
1659 gk20a_railgating_debugfs_init(&dev->dev); 1668 gk20a_railgating_debugfs_init(&dev->dev);
1660 gk20a_cde_debugfs_init(&dev->dev); 1669 gk20a_cde_debugfs_init(&dev->dev);
1670 gk20a_ce_debugfs_init(&dev->dev);
1661 gk20a_alloc_debugfs_init(dev); 1671 gk20a_alloc_debugfs_init(dev);
1662 gk20a_mm_debugfs_init(&dev->dev); 1672 gk20a_mm_debugfs_init(&dev->dev);
1663 gk20a_fifo_debugfs_init(&dev->dev); 1673 gk20a_fifo_debugfs_init(&dev->dev);
@@ -1693,6 +1703,9 @@ static int __exit gk20a_remove(struct platform_device *pdev)
1693 if (g->remove_support) 1703 if (g->remove_support)
1694 g->remove_support(dev); 1704 g->remove_support(dev);
1695 1705
1706 if (platform->has_ce)
1707 gk20a_ce_destroy(g);
1708
1696 gk20a_user_deinit(dev, &nvgpu_class); 1709 gk20a_user_deinit(dev, &nvgpu_class);
1697 1710
1698 debugfs_remove_recursive(platform->debugfs); 1711 debugfs_remove_recursive(platform->debugfs);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8aa8689b..03a698dc 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -864,6 +864,8 @@ struct gk20a {
864 864
865 struct nvgpu_bios bios; 865 struct nvgpu_bios bios;
866 struct debugfs_blob_wrapper bios_blob; 866 struct debugfs_blob_wrapper bios_blob;
867
868 struct gk20a_ce_app ce_app;
867}; 869};
868 870
869static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) 871static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 750ce10c..7b2174bc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -393,7 +393,7 @@ static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
393static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); 393static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
394static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); 394static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
395static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm); 395static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
396 396static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
397 397
398struct gk20a_dmabuf_priv { 398struct gk20a_dmabuf_priv {
399 struct mutex lock; 399 struct mutex lock;
@@ -702,6 +702,7 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block)
702static void gk20a_remove_mm_support(struct mm_gk20a *mm) 702static void gk20a_remove_mm_support(struct mm_gk20a *mm)
703{ 703{
704 struct gk20a *g = gk20a_from_mm(mm); 704 struct gk20a *g = gk20a_from_mm(mm);
705 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
705 706
706 if (g->ops.mm.remove_bar2_vm) 707 if (g->ops.mm.remove_bar2_vm)
707 g->ops.mm.remove_bar2_vm(g); 708 g->ops.mm.remove_bar2_vm(g);
@@ -709,6 +710,14 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
709 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); 710 gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
710 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); 711 gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
711 gk20a_vm_remove_support_nofree(&mm->cde.vm); 712 gk20a_vm_remove_support_nofree(&mm->cde.vm);
713
714 if (mm->ce_vidmem_ctx_id != ~0)
715 gk20a_ce_delete_context(g->dev, mm->ce_vidmem_ctx_id );
716
717 mm->ce_vidmem_ctx_id = ~0;
718
719 if (platform->has_ce)
720 gk20a_vm_remove_support_nofree(&mm->ce.vm);
712} 721}
713 722
714static int gk20a_alloc_sysmem_flush(struct gk20a *g) 723static int gk20a_alloc_sysmem_flush(struct gk20a *g)
@@ -754,6 +763,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
754{ 763{
755 struct mm_gk20a *mm = &g->mm; 764 struct mm_gk20a *mm = &g->mm;
756 int err; 765 int err;
766 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
757 767
758 gk20a_dbg_fn(""); 768 gk20a_dbg_fn("");
759 769
@@ -775,6 +785,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
775 785
776 gk20a_init_pramin(mm); 786 gk20a_init_pramin(mm);
777 787
788 mm->ce_vidmem_ctx_id = ~0;
789
778 err = gk20a_init_vidmem(mm); 790 err = gk20a_init_vidmem(mm);
779 if (err) 791 if (err)
780 return err; 792 return err;
@@ -804,6 +816,12 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
804 if (err) 816 if (err)
805 return err; 817 return err;
806 818
819 if (platform->has_ce) {
820 err = gk20a_init_ce_vm(mm);
821 if (err)
822 return err;
823 }
824
807 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ 825 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
808 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; 826 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
809 mm->remove_support = gk20a_remove_mm_support; 827 mm->remove_support = gk20a_remove_mm_support;
@@ -881,6 +899,25 @@ int gk20a_init_mm_support(struct gk20a *g)
881 return err; 899 return err;
882} 900}
883 901
902void gk20a_init_mm_ce_context(struct gk20a *g)
903{
904#if defined(CONFIG_GK20A_VIDMEM)
905 if (g->mm.vidmem_size && (g->mm.ce_vidmem_ctx_id == ~0)) {
906 g->mm.ce_vidmem_ctx_id =
907 gk20a_ce_create_context_with_cb(g->dev,
908 gk20a_fifo_get_fast_ce_runlist_id(g),
909 -1,
910 -1,
911 -1,
912 NULL);
913
914 if (g->mm.ce_vidmem_ctx_id == ~0)
915 gk20a_err(g->dev,
916 "Failed to allocate CE context for vidmem page clearing support");
917 }
918#endif
919}
920
884static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, 921static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
885 struct gk20a_mm_entry *entry) 922 struct gk20a_mm_entry *entry)
886{ 923{
@@ -2484,6 +2521,7 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2484 struct device *d = &g->mm.vidmem_dev; 2521 struct device *d = &g->mm.vidmem_dev;
2485 int err; 2522 int err;
2486 dma_addr_t iova; 2523 dma_addr_t iova;
2524 bool need_pramin_access = true;
2487 DEFINE_DMA_ATTRS(attrs); 2525 DEFINE_DMA_ATTRS(attrs);
2488 2526
2489 gk20a_dbg_fn(""); 2527 gk20a_dbg_fn("");
@@ -2519,7 +2557,38 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr,
2519 mem->size = size; 2557 mem->size = size;
2520 mem->aperture = APERTURE_VIDMEM; 2558 mem->aperture = APERTURE_VIDMEM;
2521 2559
2522 gk20a_memset(g, mem, 0, 0, size); 2560 if (g->mm.ce_vidmem_ctx_id != ~0) {
2561 struct gk20a_fence *gk20a_fence_out = NULL;
2562 u64 dst_bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0);
2563
2564 err = gk20a_ce_execute_ops(g->dev,
2565 g->mm.ce_vidmem_ctx_id,
2566 0,
2567 dst_bufbase,
2568 (u64)size,
2569 0x00000000,
2570 NVGPU_CE_DST_LOCATION_LOCAL_FB,
2571 NVGPU_CE_MEMSET,
2572 NULL,
2573 0,
2574 &gk20a_fence_out);
2575
2576 if (!err) {
2577 if (gk20a_fence_out) {
2578 err = gk20a_fence_wait(gk20a_fence_out, gk20a_get_gr_idle_timeout(g));
2579 gk20a_fence_put(gk20a_fence_out);
2580 if (err)
2581 gk20a_err(g->dev,
2582 "Failed to get the fence_out from CE execute ops");
2583 else
2584 need_pramin_access = false;
2585 }
2586 } else
2587 gk20a_err(g->dev, "Failed gk20a_ce_execute_ops[%d]",err);
2588 }
2589
2590 if (need_pramin_access)
2591 gk20a_memset(g, mem, 0, 0, size);
2523 2592
2524 gk20a_dbg_fn("done"); 2593 gk20a_dbg_fn("done");
2525 2594
@@ -4125,6 +4194,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
4125 false, false, "cde"); 4194 false, false, "cde");
4126} 4195}
4127 4196
4197static int gk20a_init_ce_vm(struct mm_gk20a *mm)
4198{
4199 struct vm_gk20a *vm = &mm->ce.vm;
4200 struct gk20a *g = gk20a_from_mm(mm);
4201 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
4202
4203 return gk20a_init_vm(mm, vm, big_page_size,
4204 SZ_4K * 16,
4205 NV_MM_DEFAULT_KERNEL_SIZE,
4206 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
4207 false, false, "ce");
4208}
4209
4128void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block, 4210void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *inst_block,
4129 struct vm_gk20a *vm) 4211 struct vm_gk20a *vm)
4130{ 4212{
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 66e46480..184c1f71 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -307,6 +307,7 @@ int gk20a_init_mm_support(struct gk20a *g);
307int gk20a_init_mm_setup_sw(struct gk20a *g); 307int gk20a_init_mm_setup_sw(struct gk20a *g);
308int gk20a_init_mm_setup_hw(struct gk20a *g); 308int gk20a_init_mm_setup_hw(struct gk20a *g);
309void gk20a_mm_debugfs_init(struct device *dev); 309void gk20a_mm_debugfs_init(struct device *dev);
310void gk20a_init_mm_ce_context(struct gk20a *g);
310 311
311int gk20a_mm_fb_flush(struct gk20a *g); 312int gk20a_mm_fb_flush(struct gk20a *g);
312void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); 313void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
@@ -349,6 +350,10 @@ struct mm_gk20a {
349 struct vm_gk20a vm; 350 struct vm_gk20a vm;
350 } cde; 351 } cde;
351 352
353 struct {
354 struct vm_gk20a vm;
355 } ce;
356
352 struct mutex l2_op_lock; 357 struct mutex l2_op_lock;
353#ifdef CONFIG_ARCH_TEGRA_18x_SOC 358#ifdef CONFIG_ARCH_TEGRA_18x_SOC
354 struct mem_desc bar2_desc; 359 struct mem_desc bar2_desc;
@@ -388,6 +393,7 @@ struct mm_gk20a {
388 393
389 size_t vidmem_size; 394 size_t vidmem_size;
390 struct device vidmem_dev; 395 struct device vidmem_dev;
396 u32 ce_vidmem_ctx_id;
391}; 397};
392 398
393int gk20a_mm_init(struct mm_gk20a *mm); 399int gk20a_mm_init(struct mm_gk20a *mm);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index 543f9873..5bde3439 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -209,6 +209,8 @@ struct gk20a_platform {
209 209
210 bool has_cde; 210 bool has_cde;
211 211
212 bool has_ce;
213
212 /* soc name for finding firmware files */ 214 /* soc name for finding firmware files */
213 const char *soc_name; 215 const char *soc_name;
214 216
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 2ed6df43..745d963c 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -900,6 +900,8 @@ struct gk20a_platform gk20a_tegra_platform = {
900 .secure_page_alloc = gk20a_tegra_secure_page_alloc, 900 .secure_page_alloc = gk20a_tegra_secure_page_alloc,
901 .dump_platform_dependencies = gk20a_tegra_debug_dump, 901 .dump_platform_dependencies = gk20a_tegra_debug_dump,
902 902
903 .has_ce = true,
904
903 .soc_name = "tegra12x", 905 .soc_name = "tegra12x",
904 906
905 .vidmem_is_vidmem = false, 907 .vidmem_is_vidmem = false,
@@ -962,6 +964,8 @@ struct gk20a_platform gm20b_tegra_platform = {
962 964
963 .has_cde = true, 965 .has_cde = true,
964 966
967 .has_ce = true,
968
965 .soc_name = "tegra21x", 969 .soc_name = "tegra21x",
966 970
967 .vidmem_is_vidmem = false, 971 .vidmem_is_vidmem = false,
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c
index ea6f3b4c..fcf63ddc 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/pci.c
@@ -56,6 +56,8 @@ static struct gk20a_platform nvgpu_pci_device = {
56 56
57 .ch_wdt_timeout_ms = 7000, 57 .ch_wdt_timeout_ms = 7000,
58 .disable_bigpage = true, 58 .disable_bigpage = true,
59
60 .has_ce = true,
59}; 61};
60 62
61static struct pci_device_id nvgpu_pci_table[] = { 63static struct pci_device_id nvgpu_pci_table[] = {