summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/common/linux/ce2.c41
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c53
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.h7
3 files changed, 29 insertions, 72 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
index 97dc6678..7cb39382 100644
--- a/drivers/gpu/nvgpu/common/linux/ce2.c
+++ b/drivers/gpu/nvgpu/common/linux/ce2.c
@@ -54,7 +54,6 @@ int gk20a_ce_execute_ops(struct gk20a *g,
54 u64 cmd_buf_gpu_va = 0; 54 u64 cmd_buf_gpu_va = 0;
55 u32 methodSize; 55 u32 methodSize;
56 u32 cmd_buf_read_offset; 56 u32 cmd_buf_read_offset;
57 u32 fence_index;
58 u32 dma_copy_class; 57 u32 dma_copy_class;
59 struct nvgpu_gpfifo gpfifo; 58 struct nvgpu_gpfifo gpfifo;
60 struct nvgpu_fence fence = {0,0}; 59 struct nvgpu_fence fence = {0,0};
@@ -87,38 +86,22 @@ int gk20a_ce_execute_ops(struct gk20a *g,
87 86
88 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 87 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
89 88
90 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; 89 ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
91 90
92 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * 91 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
93 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); 92 (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
94
95 /* at end of command buffer has gk20a_fence for command buffer sync */
96 fence_index = (cmd_buf_read_offset +
97 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
98 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
99
100 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
101 ret = -ENOMEM;
102 goto noop;
103 }
104 93
105 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; 94 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
106 95
107 /* 0 is treated as invalid pre-sync */ 96 if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
108 if (cmd_buf_cpu_va[fence_index]) { 97 struct gk20a_fence **prev_post_fence =
109 struct gk20a_fence * ce_cmd_buf_fence_in = NULL; 98 &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
110 99
111 memcpy((void *)&ce_cmd_buf_fence_in, 100 ret = gk20a_fence_wait(g, *prev_post_fence,
112 (void *)(cmd_buf_cpu_va + fence_index),
113 sizeof(struct gk20a_fence *));
114 ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
115 gk20a_get_gr_idle_timeout(g)); 101 gk20a_get_gr_idle_timeout(g));
116 102
117 gk20a_fence_put(ce_cmd_buf_fence_in); 103 gk20a_fence_put(*prev_post_fence);
118 /* Reset the stored last pre-sync */ 104 *prev_post_fence = NULL;
119 memset((void *)(cmd_buf_cpu_va + fence_index),
120 0,
121 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
122 if (ret) 105 if (ret)
123 goto noop; 106 goto noop;
124 } 107 }
@@ -130,7 +113,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
130 dst_buf, 113 dst_buf,
131 size, 114 size,
132 &cmd_buf_cpu_va[cmd_buf_read_offset], 115 &cmd_buf_cpu_va[cmd_buf_read_offset],
133 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, 116 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
134 payload, 117 payload,
135 gk20a_get_valid_launch_flags(g, launch_flags), 118 gk20a_get_valid_launch_flags(g, launch_flags),
136 request_operation, 119 request_operation,
@@ -154,10 +137,8 @@ int gk20a_ce_execute_ops(struct gk20a *g,
154 &ce_cmd_buf_fence_out, false, NULL); 137 &ce_cmd_buf_fence_out, false, NULL);
155 138
156 if (!ret) { 139 if (!ret) {
157 memcpy((void *)(cmd_buf_cpu_va + fence_index), 140 ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
158 (void *)&ce_cmd_buf_fence_out, 141 ce_cmd_buf_fence_out;
159 sizeof(struct gk20a_fence *));
160
161 if (gk20a_fence_out) { 142 if (gk20a_fence_out) {
162 gk20a_fence_get(ce_cmd_buf_fence_out); 143 gk20a_fence_get(ce_cmd_buf_fence_out);
163 *gk20a_fence_out = ce_cmd_buf_fence_out; 144 *gk20a_fence_out = ce_cmd_buf_fence_out;
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index c4fcca3c..18878991 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -103,39 +103,15 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
103} 103}
104 104
105/* static CE app api */ 105/* static CE app api */
106static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) 106static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
107{ 107{
108 u32 cmd_buf_index; 108 u32 i;
109 u32 cmd_buf_read_offset; 109
110 u32 fence_index; 110 for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
111 u32 *cmd_buf_cpu_va; 111 struct gk20a_fence **fence = &ce_ctx->postfences[i];
112 112 if (*fence)
113 for (cmd_buf_index = 0; 113 gk20a_fence_put(*fence);
114 cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; 114 *fence = NULL;
115 cmd_buf_index++) {
116 cmd_buf_read_offset = (cmd_buf_index *
117 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
118
119 /* at end of command buffer has gk20a_fence for command buffer sync */
120 fence_index = (cmd_buf_read_offset +
121 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
122 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
123
124 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
125
126 /* 0 is treated as invalid pre-sync */
127 if (cmd_buf_cpu_va[fence_index]) {
128 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
129
130 memcpy((void *)&ce_cmd_buf_fence_in,
131 (void *)(cmd_buf_cpu_va + fence_index),
132 sizeof(struct gk20a_fence *));
133 gk20a_fence_put(ce_cmd_buf_fence_in);
134 /* Reset the stored last pre-sync */
135 memset((void *)(cmd_buf_cpu_va + fence_index),
136 0,
137 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
138 }
139 } 115 }
140} 116}
141 117
@@ -148,8 +124,8 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
148 124
149 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 125 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
150 126
151 if (ce_ctx->cmd_buf_mem.cpu_va) { 127 if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
152 gk20a_ce_free_command_buffer_stored_fence(ce_ctx); 128 gk20a_ce_put_fences(ce_ctx);
153 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); 129 nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
154 } 130 }
155 131
@@ -449,8 +425,6 @@ u32 gk20a_ce_create_context(struct gk20a *g,
449 ce_ctx->g = g; 425 ce_ctx->g = g;
450 426
451 ce_ctx->cmd_buf_read_queue_offset = 0; 427 ce_ctx->cmd_buf_read_queue_offset = 0;
452 ce_ctx->cmd_buf_end_queue_offset =
453 (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF);
454 428
455 ce_ctx->vm = g->mm.ce.vm; 429 ce_ctx->vm = g->mm.ce.vm;
456 430
@@ -491,8 +465,11 @@ u32 gk20a_ce_create_context(struct gk20a *g,
491 goto end; 465 goto end;
492 } 466 }
493 467
494 /* allocate command buffer (4096 should be more than enough) from sysmem*/ 468 /* allocate command buffer from sysmem */
495 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); 469 err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
470 NVGPU_CE_MAX_INFLIGHT_JOBS *
471 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
472 &ce_ctx->cmd_buf_mem);
496 if (err) { 473 if (err) {
497 nvgpu_err(g, 474 nvgpu_err(g,
498 "ce: could not allocate command buffer for CE context"); 475 "ce: could not allocate command buffer for CE context");
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 0b475f65..1a102070 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -36,9 +36,8 @@ int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base);
36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff 36#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffff
37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff 37#define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xff
38 38
39#define NVGPU_CE_COMMAND_BUF_SIZE 8192 39#define NVGPU_CE_MAX_INFLIGHT_JOBS 32
40#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF 256 40#define NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF 256
41#define NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING 8
42 41
43/* dma launch_flags */ 42/* dma launch_flags */
44enum { 43enum {
@@ -106,11 +105,11 @@ struct gk20a_gpu_ctx {
106 105
107 /* cmd buf mem_desc */ 106 /* cmd buf mem_desc */
108 struct nvgpu_mem cmd_buf_mem; 107 struct nvgpu_mem cmd_buf_mem;
108 struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
109 109
110 struct nvgpu_list_node list; 110 struct nvgpu_list_node list;
111 111
112 u32 cmd_buf_read_queue_offset; 112 u32 cmd_buf_read_queue_offset;
113 u32 cmd_buf_end_queue_offset;
114}; 113};
115 114
116static inline struct gk20a_gpu_ctx * 115static inline struct gk20a_gpu_ctx *