summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/ce2.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/ce2.c41
1 files changed, 11 insertions, 30 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
index 97dc6678..7cb39382 100644
--- a/drivers/gpu/nvgpu/common/linux/ce2.c
+++ b/drivers/gpu/nvgpu/common/linux/ce2.c
@@ -54,7 +54,6 @@ int gk20a_ce_execute_ops(struct gk20a *g,
54 u64 cmd_buf_gpu_va = 0; 54 u64 cmd_buf_gpu_va = 0;
55 u32 methodSize; 55 u32 methodSize;
56 u32 cmd_buf_read_offset; 56 u32 cmd_buf_read_offset;
57 u32 fence_index;
58 u32 dma_copy_class; 57 u32 dma_copy_class;
59 struct nvgpu_gpfifo gpfifo; 58 struct nvgpu_gpfifo gpfifo;
60 struct nvgpu_fence fence = {0,0}; 59 struct nvgpu_fence fence = {0,0};
@@ -87,38 +86,22 @@ int gk20a_ce_execute_ops(struct gk20a *g,
87 86
88 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 87 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
89 88
90 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; 89 ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
91 90
92 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * 91 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
93 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); 92 (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
94
95 /* at end of command buffer has gk20a_fence for command buffer sync */
96 fence_index = (cmd_buf_read_offset +
97 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
98 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
99
100 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
101 ret = -ENOMEM;
102 goto noop;
103 }
104 93
105 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; 94 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
106 95
107 /* 0 is treated as invalid pre-sync */ 96 if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
108 if (cmd_buf_cpu_va[fence_index]) { 97 struct gk20a_fence **prev_post_fence =
109 struct gk20a_fence * ce_cmd_buf_fence_in = NULL; 98 &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
110 99
111 memcpy((void *)&ce_cmd_buf_fence_in, 100 ret = gk20a_fence_wait(g, *prev_post_fence,
112 (void *)(cmd_buf_cpu_va + fence_index),
113 sizeof(struct gk20a_fence *));
114 ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
115 gk20a_get_gr_idle_timeout(g)); 101 gk20a_get_gr_idle_timeout(g));
116 102
117 gk20a_fence_put(ce_cmd_buf_fence_in); 103 gk20a_fence_put(*prev_post_fence);
118 /* Reset the stored last pre-sync */ 104 *prev_post_fence = NULL;
119 memset((void *)(cmd_buf_cpu_va + fence_index),
120 0,
121 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
122 if (ret) 105 if (ret)
123 goto noop; 106 goto noop;
124 } 107 }
@@ -130,7 +113,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
130 dst_buf, 113 dst_buf,
131 size, 114 size,
132 &cmd_buf_cpu_va[cmd_buf_read_offset], 115 &cmd_buf_cpu_va[cmd_buf_read_offset],
133 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, 116 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
134 payload, 117 payload,
135 gk20a_get_valid_launch_flags(g, launch_flags), 118 gk20a_get_valid_launch_flags(g, launch_flags),
136 request_operation, 119 request_operation,
@@ -154,10 +137,8 @@ int gk20a_ce_execute_ops(struct gk20a *g,
154 &ce_cmd_buf_fence_out, false, NULL); 137 &ce_cmd_buf_fence_out, false, NULL);
155 138
156 if (!ret) { 139 if (!ret) {
157 memcpy((void *)(cmd_buf_cpu_va + fence_index), 140 ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
158 (void *)&ce_cmd_buf_fence_out, 141 ce_cmd_buf_fence_out;
159 sizeof(struct gk20a_fence *));
160
161 if (gk20a_fence_out) { 142 if (gk20a_fence_out) {
162 gk20a_fence_get(ce_cmd_buf_fence_out); 143 gk20a_fence_get(ce_cmd_buf_fence_out);
163 *gk20a_fence_out = ce_cmd_buf_fence_out; 144 *gk20a_fence_out = ce_cmd_buf_fence_out;