diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ce2.c | 41 |
1 files changed, 11 insertions, 30 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c index 97dc6678..7cb39382 100644 --- a/drivers/gpu/nvgpu/common/linux/ce2.c +++ b/drivers/gpu/nvgpu/common/linux/ce2.c | |||
@@ -54,7 +54,6 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
54 | u64 cmd_buf_gpu_va = 0; | 54 | u64 cmd_buf_gpu_va = 0; |
55 | u32 methodSize; | 55 | u32 methodSize; |
56 | u32 cmd_buf_read_offset; | 56 | u32 cmd_buf_read_offset; |
57 | u32 fence_index; | ||
58 | u32 dma_copy_class; | 57 | u32 dma_copy_class; |
59 | struct nvgpu_gpfifo gpfifo; | 58 | struct nvgpu_gpfifo gpfifo; |
60 | struct nvgpu_fence fence = {0,0}; | 59 | struct nvgpu_fence fence = {0,0}; |
@@ -87,38 +86,22 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
87 | 86 | ||
88 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | 87 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); |
89 | 88 | ||
90 | ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; | 89 | ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; |
91 | 90 | ||
92 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | 91 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * |
93 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | 92 | (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32))); |
94 | |||
95 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
96 | fence_index = (cmd_buf_read_offset + | ||
97 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
98 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
99 | |||
100 | if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { | ||
101 | ret = -ENOMEM; | ||
102 | goto noop; | ||
103 | } | ||
104 | 93 | ||
105 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | 94 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; |
106 | 95 | ||
107 | /* 0 is treated as invalid pre-sync */ | 96 | if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) { |
108 | if (cmd_buf_cpu_va[fence_index]) { | 97 | struct gk20a_fence **prev_post_fence = |
109 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | 98 | &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; |
110 | 99 | ||
111 | memcpy((void *)&ce_cmd_buf_fence_in, | 100 | ret = gk20a_fence_wait(g, *prev_post_fence, |
112 | (void *)(cmd_buf_cpu_va + fence_index), | ||
113 | sizeof(struct gk20a_fence *)); | ||
114 | ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in, | ||
115 | gk20a_get_gr_idle_timeout(g)); | 101 | gk20a_get_gr_idle_timeout(g)); |
116 | 102 | ||
117 | gk20a_fence_put(ce_cmd_buf_fence_in); | 103 | gk20a_fence_put(*prev_post_fence); |
118 | /* Reset the stored last pre-sync */ | 104 | *prev_post_fence = NULL; |
119 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
120 | 0, | ||
121 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
122 | if (ret) | 105 | if (ret) |
123 | goto noop; | 106 | goto noop; |
124 | } | 107 | } |
@@ -130,7 +113,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
130 | dst_buf, | 113 | dst_buf, |
131 | size, | 114 | size, |
132 | &cmd_buf_cpu_va[cmd_buf_read_offset], | 115 | &cmd_buf_cpu_va[cmd_buf_read_offset], |
133 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, | 116 | NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, |
134 | payload, | 117 | payload, |
135 | gk20a_get_valid_launch_flags(g, launch_flags), | 118 | gk20a_get_valid_launch_flags(g, launch_flags), |
136 | request_operation, | 119 | request_operation, |
@@ -154,10 +137,8 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
154 | &ce_cmd_buf_fence_out, false, NULL); | 137 | &ce_cmd_buf_fence_out, false, NULL); |
155 | 138 | ||
156 | if (!ret) { | 139 | if (!ret) { |
157 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | 140 | ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = |
158 | (void *)&ce_cmd_buf_fence_out, | 141 | ce_cmd_buf_fence_out; |
159 | sizeof(struct gk20a_fence *)); | ||
160 | |||
161 | if (gk20a_fence_out) { | 142 | if (gk20a_fence_out) { |
162 | gk20a_fence_get(ce_cmd_buf_fence_out); | 143 | gk20a_fence_get(ce_cmd_buf_fence_out); |
163 | *gk20a_fence_out = ce_cmd_buf_fence_out; | 144 | *gk20a_fence_out = ce_cmd_buf_fence_out; |