summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-01-25 08:31:18 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-26 13:50:37 -0500
commit1a7484c901fe1abe0c35593ec96ff10e162099c4 (patch)
treeda9b0cdb8c55dbf281884d126d6d957e61d8f16f /drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
parent91114cd6d4ca652cb726baf2329fa807442c68a8 (diff)
gpu: nvgpu: ce: store fences in a separate array
Simplify the copyengine code massively by storing the job post fence pointers in an array of fences instead of mixing them up in the command buffer memory. The post fences are used when the ring buffer of a context gets full and we need to wait for the oldest slot to free up. NVGPU-43 NVGPU-52 Change-Id: I36969e19676bec0f38de9a6357767a8d5cbcd329 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1646037 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c53
1 files changed, 15 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index c4fcca3c..18878991 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -103,39 +103,15 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
103} 103}
104 104
105/* static CE app api */ 105/* static CE app api */
106static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) 106static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
107{ 107{
108 u32 cmd_buf_index; 108 u32 i;
109 u32 cmd_buf_read_offset; 109
110 u32 fence_index; 110 for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
111 u32 *cmd_buf_cpu_va; 111 struct gk20a_fence **fence = &ce_ctx->postfences[i];
112 112 if (*fence)
113 for (cmd_buf_index = 0; 113 gk20a_fence_put(*fence);
114 cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; 114 *fence = NULL;
115 cmd_buf_index++) {
116 cmd_buf_read_offset = (cmd_buf_index *
117 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
118
119 /* at end of command buffer has gk20a_fence for command buffer sync */
120 fence_index = (cmd_buf_read_offset +
121 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
122 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
123
124 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
125
126 /* 0 is treated as invalid pre-sync */
127 if (cmd_buf_cpu_va[fence_index]) {
128 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
129
130 memcpy((void *)&ce_cmd_buf_fence_in,
131 (void *)(cmd_buf_cpu_va + fence_index),
132 sizeof(struct gk20a_fence *));
133 gk20a_fence_put(ce_cmd_buf_fence_in);
134 /* Reset the stored last pre-sync */
135 memset((void *)(cmd_buf_cpu_va + fence_index),
136 0,
137 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
138 }
139 } 115 }
140} 116}
141 117
@@ -148,8 +124,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
148 124
149 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 125 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
150 126
151 if (ce_ctx->cmd_buf_mem.cpu_va) { 127 if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
152 gk20a_ce_free_command_buffer_stored_fence(ce_ctx); 128 gk20a_ce_put_fences(ce_ctx);
153 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); 129 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
154 } 130 }
155 131
@@ -449,8 +425,6 @@ u32 gk20a_ce_create_context(struct gk20a *g,
449 ce_ctx->g = g; 425 ce_ctx->g = g;
450 426
451 ce_ctx->cmd_buf_read_queue_offset = 0; 427 ce_ctx->cmd_buf_read_queue_offset = 0;
452 ce_ctx->cmd_buf_end_queue_offset =
453 (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF);
454 428
455 ce_ctx->vm = g->mm.ce.vm; 429 ce_ctx->vm = g->mm.ce.vm;
456 430
@@ -491,8 +465,11 @@ u32 gk20a_ce_create_context(struct gk20a *g,
491 goto end; 465 goto end;
492 } 466 }
493 467
494 /* allocate command buffer (4096 should be more than enough) from sysmem*/ 468 /* allocate command buffer from sysmem */
495 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); 469 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
470 NVGPU_CE_MAX_INFLIGHT_JOBS *
471 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
472 &ce_ctx->cmd_buf_mem);
496 if (err) { 473 if (err) {
497 nvgpu_err(g, 474 nvgpu_err(g,
498 "ce: could not allocate command buffer for CE context"); 475 "ce: could not allocate command buffer for CE context");