diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 53 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.h | 7 |
2 files changed, 18 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index c4fcca3c..18878991 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -103,39 +103,15 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) | |||
103 | } | 103 | } |
104 | 104 | ||
105 | /* static CE app api */ | 105 | /* static CE app api */ |
106 | static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) | 106 | static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx) |
107 | { | 107 | { |
108 | u32 cmd_buf_index; | 108 | u32 i; |
109 | u32 cmd_buf_read_offset; | 109 | |
110 | u32 fence_index; | 110 | for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) { |
111 | u32 *cmd_buf_cpu_va; | 111 | struct gk20a_fence **fence = &ce_ctx->postfences[i]; |
112 | 112 | if (*fence) | |
113 | for (cmd_buf_index = 0; | 113 | gk20a_fence_put(*fence); |
114 | cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; | 114 | *fence = NULL; |
115 | cmd_buf_index++) { | ||
116 | cmd_buf_read_offset = (cmd_buf_index * | ||
117 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
118 | |||
119 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
120 | fence_index = (cmd_buf_read_offset + | ||
121 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
122 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
123 | |||
124 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
125 | |||
126 | /* 0 is treated as invalid pre-sync */ | ||
127 | if (cmd_buf_cpu_va[fence_index]) { | ||
128 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
129 | |||
130 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
131 | (void *)(cmd_buf_cpu_va + fence_index), | ||
132 | sizeof(struct gk20a_fence *)); | ||
133 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
134 | /* Reset the stored last pre-sync */ | ||
135 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
136 | 0, | ||
137 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
138 | } | ||
139 | } | 115 | } |
140 | } | 116 | } |
141 | 117 | ||
@@ -148,8 +124,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) | |||
148 | 124 | ||
149 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | 125 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); |
150 | 126 | ||
151 | if (ce_ctx->cmd_buf_mem.cpu_va) { | 127 | if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) { |
152 | gk20a_ce_free_command_buffer_stored_fence(ce_ctx); | 128 | gk20a_ce_put_fences(ce_ctx); |
153 | nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); | 129 | nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); |
154 | } | 130 | } |
155 | 131 | ||
@@ -449,8 +425,6 @@ u32 gk20a_ce_create_context(struct gk20a *g, | |||
449 | ce_ctx->g = g; | 425 | ce_ctx->g = g; |
450 | 426 | ||
451 | ce_ctx->cmd_buf_read_queue_offset = 0; | 427 | ce_ctx->cmd_buf_read_queue_offset = 0; |
452 | ce_ctx->cmd_buf_end_queue_offset = | ||
453 | (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF); | ||
454 | 428 | ||
455 | ce_ctx->vm = g->mm.ce.vm; | 429 | ce_ctx->vm = g->mm.ce.vm; |
456 | 430 | ||
@@ -491,8 +465,11 @@ u32 gk20a_ce_create_context(struct gk20a *g, | |||
491 | goto end; | 465 | goto end; |
492 | } | 466 | } |
493 | 467 | ||
494 | /* allocate command buffer (4096 should be more than enough) from sysmem*/ | 468 | /* allocate command buffer from sysmem */ |
495 | err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); | 469 | err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, |
470 | NVGPU_CE_MAX_INFLIGHT_JOBS * | ||
471 | NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, | ||
472 | &ce_ctx->cmd_buf_mem); | ||
496 | if (err) { | 473 | if (err) { |
497 | nvgpu_err(g, | 474 | nvgpu_err(g, |
498 | "ce: could not allocate command buffer for CE context"); | 475 | "ce: could not allocate command buffer for CE context"); |
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h index 0b475f65..1a102070 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h | |||
@@ -36,9 +36,8 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); | |||
36 | #define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff | 36 | #define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff |
37 | #define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff | 37 | #define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff |
38 | 38 | ||
39 | #define NVGPU_CE_COMMAND_BUF_SIZE 8192 | 39 | #define NVGPU_CE_MAX_INFLIGHT_JOBS 32 |
40 | #define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF 256 | 40 | #define NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF 256 |
41 | #define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING 8 | ||
42 | 41 | ||
43 | /* dma launch_flags */ | 42 | /* dma launch_flags */ |
44 | enum { | 43 | enum { |
@@ -106,11 +105,11 @@ struct gk20a_gpu_ctx { | |||
106 | 105 | ||
107 | /* cmd buf mem_desc */ | 106 | /* cmd buf mem_desc */ |
108 | struct nvgpu_mem cmd_buf_mem; | 107 | struct nvgpu_mem cmd_buf_mem; |
108 | struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS]; | ||
109 | 109 | ||
110 | struct nvgpu_list_node list; | 110 | struct nvgpu_list_node list; |
111 | 111 | ||
112 | u32 cmd_buf_read_queue_offset; | 112 | u32 cmd_buf_read_queue_offset; |
113 | u32 cmd_buf_end_queue_offset; | ||
114 | }; | 113 | }; |
115 | 114 | ||
116 | static inline struct gk20a_gpu_ctx * | 115 | static inline struct gk20a_gpu_ctx * |