summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-01-25 08:31:18 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-26 13:50:37 -0500
commit1a7484c901fe1abe0c35593ec96ff10e162099c4 (patch)
treeda9b0cdb8c55dbf281884d126d6d957e61d8f16f /drivers/gpu/nvgpu/gk20a
parent91114cd6d4ca652cb726baf2329fa807442c68a8 (diff)
gpu: nvgpu: ce: store fences in a separate array
Simplify the copyengine code massively by storing the job post fence pointers in an array of fences instead of mixing them up in the command buffer memory. The post fences are used when the ring buffer of a context gets full and we need to wait for the oldest slot to free up. NVGPU-43 NVGPU-52 Change-Id: I36969e19676bec0f38de9a6357767a8d5cbcd329 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1646037 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c53
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.h7
2 files changed, 18 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index c4fcca3c..18878991 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -103,39 +103,15 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
103} 103}
104 104
105/* static CE app api */ 105/* static CE app api */
106static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) 106static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
107{ 107{
108 u32 cmd_buf_index; 108 u32 i;
109 u32 cmd_buf_read_offset; 109
110 u32 fence_index; 110 for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
111 u32 *cmd_buf_cpu_va; 111 struct gk20a_fence **fence = &ce_ctx->postfences[i];
112 112 if (*fence)
113 for (cmd_buf_index = 0; 113 gk20a_fence_put(*fence);
114 cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; 114 *fence = NULL;
115 cmd_buf_index++) {
116 cmd_buf_read_offset = (cmd_buf_index *
117 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
118
119 /* at end of command buffer has gk20a_fence for command buffer sync */
120 fence_index = (cmd_buf_read_offset +
121 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
122 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
123
124 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
125
126 /* 0 is treated as invalid pre-sync */
127 if (cmd_buf_cpu_va[fence_index]) {
128 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
129
130 memcpy((void *)&ce_cmd_buf_fence_in,
131 (void *)(cmd_buf_cpu_va + fence_index),
132 sizeof(struct gk20a_fence *));
133 gk20a_fence_put(ce_cmd_buf_fence_in);
134 /* Reset the stored last pre-sync */
135 memset((void *)(cmd_buf_cpu_va + fence_index),
136 0,
137 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
138 }
139 } 115 }
140} 116}
141 117
@@ -148,8 +124,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
148 124
149 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 125 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
150 126
151 if (ce_ctx->cmd_buf_mem.cpu_va) { 127 if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
152 gk20a_ce_free_command_buffer_stored_fence(ce_ctx); 128 gk20a_ce_put_fences(ce_ctx);
153 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); 129 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
154 } 130 }
155 131
@@ -449,8 +425,6 @@ u32 gk20a_ce_create_context(struct gk20a *g,
449 ce_ctx->g = g; 425 ce_ctx->g = g;
450 426
451 ce_ctx->cmd_buf_read_queue_offset = 0; 427 ce_ctx->cmd_buf_read_queue_offset = 0;
452 ce_ctx->cmd_buf_end_queue_offset =
453 (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF);
454 428
455 ce_ctx->vm = g->mm.ce.vm; 429 ce_ctx->vm = g->mm.ce.vm;
456 430
@@ -491,8 +465,11 @@ u32 gk20a_ce_create_context(struct gk20a *g,
491 goto end; 465 goto end;
492 } 466 }
493 467
494 /* allocate command buffer (4096 should be more than enough) from sysmem*/ 468 /* allocate command buffer from sysmem */
495 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); 469 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
470 NVGPU_CE_MAX_INFLIGHT_JOBS *
471 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
472 &ce_ctx->cmd_buf_mem);
496 if (err) { 473 if (err) {
497 nvgpu_err(g, 474 nvgpu_err(g,
498 "ce: could not allocate command buffer for CE context"); 475 "ce: could not allocate command buffer for CE context");
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 0b475f65..1a102070 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -36,9 +36,8 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff 36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff 37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff
38 38
39#define NVGPU_CE_COMMAND_BUF_SIZE 8192 39#define NVGPU_CE_MAX_INFLIGHT_JOBS 32
40#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF 256 40#define NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF 256
41#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING 8
42 41
43/* dma launch_flags */ 42/* dma launch_flags */
44enum { 43enum {
@@ -106,11 +105,11 @@ struct gk20a_gpu_ctx {
106 105
107 /* cmd buf mem_desc */ 106 /* cmd buf mem_desc */
108 struct nvgpu_mem cmd_buf_mem; 107 struct nvgpu_mem cmd_buf_mem;
108 struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
109 109
110 struct nvgpu_list_node list; 110 struct nvgpu_list_node list;
111 111
112 u32 cmd_buf_read_queue_offset; 112 u32 cmd_buf_read_queue_offset;
113 u32 cmd_buf_end_queue_offset;
114}; 113};
115 114
116static inline struct gk20a_gpu_ctx * 115static inline struct gk20a_gpu_ctx *