summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/ce2.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-01-25 08:31:18 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-26 13:50:37 -0500
commit1a7484c901fe1abe0c35593ec96ff10e162099c4 (patch)
treeda9b0cdb8c55dbf281884d126d6d957e61d8f16f /drivers/gpu/nvgpu/common/linux/ce2.c
parent91114cd6d4ca652cb726baf2329fa807442c68a8 (diff)
gpu: nvgpu: ce: store fences in a separate array
Simplify the copyengine code massively by storing the job post fence pointers in an array of fences instead of mixing them up in the command buffer memory. The post fences are used when the ring buffer of a context gets full and we need to wait for the oldest slot to free up. NVGPU-43 NVGPU-52 Change-Id: I36969e19676bec0f38de9a6357767a8d5cbcd329 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1646037 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/ce2.c41
1 files changed, 11 insertions, 30 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
index 97dc6678..7cb39382 100644
--- a/drivers/gpu/nvgpu/common/linux/ce2.c
+++ b/drivers/gpu/nvgpu/common/linux/ce2.c
@@ -54,7 +54,6 @@ int gk20a_ce_execute_ops(struct gk20a *g,
54 u64 cmd_buf_gpu_va = 0; 54 u64 cmd_buf_gpu_va = 0;
55 u32 methodSize; 55 u32 methodSize;
56 u32 cmd_buf_read_offset; 56 u32 cmd_buf_read_offset;
57 u32 fence_index;
58 u32 dma_copy_class; 57 u32 dma_copy_class;
59 struct nvgpu_gpfifo gpfifo; 58 struct nvgpu_gpfifo gpfifo;
60 struct nvgpu_fence fence = {0,0}; 59 struct nvgpu_fence fence = {0,0};
@@ -87,38 +86,22 @@ int gk20a_ce_execute_ops(struct gk20a *g,
87 86
88 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); 87 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
89 88
90 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; 89 ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
91 90
92 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * 91 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
93 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); 92 (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
94
95 /* at end of command buffer has gk20a_fence for command buffer sync */
96 fence_index = (cmd_buf_read_offset +
97 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
98 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
99
100 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
101 ret = -ENOMEM;
102 goto noop;
103 }
104 93
105 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; 94 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
106 95
107 /* 0 is treated as invalid pre-sync */ 96 if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
108 if (cmd_buf_cpu_va[fence_index]) { 97 struct gk20a_fence **prev_post_fence =
109 struct gk20a_fence * ce_cmd_buf_fence_in = NULL; 98 &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
110 99
111 memcpy((void *)&ce_cmd_buf_fence_in, 100 ret = gk20a_fence_wait(g, *prev_post_fence,
112 (void *)(cmd_buf_cpu_va + fence_index),
113 sizeof(struct gk20a_fence *));
114 ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
115 gk20a_get_gr_idle_timeout(g)); 101 gk20a_get_gr_idle_timeout(g));
116 102
117 gk20a_fence_put(ce_cmd_buf_fence_in); 103 gk20a_fence_put(*prev_post_fence);
118 /* Reset the stored last pre-sync */ 104 *prev_post_fence = NULL;
119 memset((void *)(cmd_buf_cpu_va + fence_index),
120 0,
121 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
122 if (ret) 105 if (ret)
123 goto noop; 106 goto noop;
124 } 107 }
@@ -130,7 +113,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
130 dst_buf, 113 dst_buf,
131 size, 114 size,
132 &cmd_buf_cpu_va[cmd_buf_read_offset], 115 &cmd_buf_cpu_va[cmd_buf_read_offset],
133 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, 116 NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
134 payload, 117 payload,
135 gk20a_get_valid_launch_flags(g, launch_flags), 118 gk20a_get_valid_launch_flags(g, launch_flags),
136 request_operation, 119 request_operation,
@@ -154,10 +137,8 @@ int gk20a_ce_execute_ops(struct gk20a *g,
154 &ce_cmd_buf_fence_out, false, NULL); 137 &ce_cmd_buf_fence_out, false, NULL);
155 138
156 if (!ret) { 139 if (!ret) {
157 memcpy((void *)(cmd_buf_cpu_va + fence_index), 140 ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
158 (void *)&ce_cmd_buf_fence_out, 141 ce_cmd_buf_fence_out;
159 sizeof(struct gk20a_fence *));
160
161 if (gk20a_fence_out) { 142 if (gk20a_fence_out) {
162 gk20a_fence_get(ce_cmd_buf_fence_out); 143 gk20a_fence_get(ce_cmd_buf_fence_out);
163 *gk20a_fence_out = ce_cmd_buf_fence_out; 144 *gk20a_fence_out = ce_cmd_buf_fence_out;