summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c53
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.h7
2 files changed, 18 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index c4fcca3c..18878991 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -103,39 +103,15 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
103} 103}
104 104
105/* static CE app api */ 105/* static CE app api */
106static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) 106static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
107{ 107{
108 u32 cmd_buf_index; 108 u32 i;
109 u32 cmd_buf_read_offset; 109
110 u32 fence_index; 110 for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
111 u32 *cmd_buf_cpu_va; 111 struct gk20a_fence **fence = &ce_ctx->postfences[i];
112 112 if (*fence)
113 for (cmd_buf_index = 0; 113 gk20a_fence_put(*fence);
114 cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; 114 *fence = NULL;
115 cmd_buf_index++) {
116 cmd_buf_read_offset = (cmd_buf_index *
117 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
118
119 /* at end of command buffer has gk20a_fence for command buffer sync */
120 fence_index = (cmd_buf_read_offset +
121 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
122 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
123
124 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
125
126 /* 0 is treated as invalid pre-sync */
127 if (cmd_buf_cpu_va[fence_index]) {
128 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
129
130 memcpy((void *)&ce_cmd_buf_fence_in,
131 (void *)(cmd_buf_cpu_va + fence_index),
132 sizeof(struct gk20a_fence *));
133 gk20a_fence_put(ce_cmd_buf_fence_in);
134 /* Reset the stored last pre-sync */
135 memset((void *)(cmd_buf_cpu_va + fence_index),
136 0,
137 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
138 }
139 } 115 }
140} 116}
141 117
@@ -148,8 +124,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
148 124
149 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 125 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
150 126
151 if (ce_ctx->cmd_buf_mem.cpu_va) { 127 if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
152 gk20a_ce_free_command_buffer_stored_fence(ce_ctx); 128 gk20a_ce_put_fences(ce_ctx);
153 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); 129 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
154 } 130 }
155 131
@@ -449,8 +425,6 @@ u32 gk20a_ce_create_context(struct gk20a *g,
449 ce_ctx->g = g; 425 ce_ctx->g = g;
450 426
451 ce_ctx->cmd_buf_read_queue_offset = 0; 427 ce_ctx->cmd_buf_read_queue_offset = 0;
452 ce_ctx->cmd_buf_end_queue_offset =
453 (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF);
454 428
455 ce_ctx->vm = g->mm.ce.vm; 429 ce_ctx->vm = g->mm.ce.vm;
456 430
@@ -491,8 +465,11 @@ u32 gk20a_ce_create_context(struct gk20a *g,
491 goto end; 465 goto end;
492 } 466 }
493 467
494 /* allocate command buffer (4096 should be more than enough) from sysmem*/ 468 /* allocate command buffer from sysmem */
495 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); 469 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
470 NVGPU_CE_MAX_INFLIGHT_JOBS *
471 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
472 &ce_ctx->cmd_buf_mem);
496 if (err) { 473 if (err) {
497 nvgpu_err(g, 474 nvgpu_err(g,
498 "ce: could not allocate command buffer for CE context"); 475 "ce: could not allocate command buffer for CE context");
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 0b475f65..1a102070 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -36,9 +36,8 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff 36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff 37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff
38 38
39#define NVGPU_CE_COMMAND_BUF_SIZE 8192 39#define NVGPU_CE_MAX_INFLIGHT_JOBS 32
40#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF 256 40#define NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF 256
41#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING 8
42 41
43/* dma launch_flags */ 42/* dma launch_flags */
44enum { 43enum {
@@ -106,11 +105,11 @@ struct gk20a_gpu_ctx {
106 105
107 /* cmd buf mem_desc */ 106 /* cmd buf mem_desc */
108 struct nvgpu_mem cmd_buf_mem; 107 struct nvgpu_mem cmd_buf_mem;
108 struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
109 109
110 struct nvgpu_list_node list; 110 struct nvgpu_list_node list;
111 111
112 u32 cmd_buf_read_queue_offset; 112 u32 cmd_buf_read_queue_offset;
113 u32 cmd_buf_end_queue_offset;
114}; 113};
115 114
116static inline struct gk20a_gpu_ctx * 115static inline struct gk20a_gpu_ctx *