diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-01-25 08:31:18 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-01-26 13:50:37 -0500 |
commit | 1a7484c901fe1abe0c35593ec96ff10e162099c4 (patch) | |
tree | da9b0cdb8c55dbf281884d126d6d957e61d8f16f /drivers/gpu/nvgpu/common/linux/ce2.c | |
parent | 91114cd6d4ca652cb726baf2329fa807442c68a8 (diff) |
gpu: nvgpu: ce: store fences in a separate array
Simplify the copyengine code massively by storing the job post fence
pointers in an array of fences instead of mixing them up in the command
buffer memory. The post fences are used when the ring buffer of a
context gets full and we need to wait for the oldest slot to free up.
NVGPU-43
NVGPU-52
Change-Id: I36969e19676bec0f38de9a6357767a8d5cbcd329
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1646037
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/ce2.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ce2.c | 41 |
1 files changed, 11 insertions, 30 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c index 97dc6678..7cb39382 100644 --- a/drivers/gpu/nvgpu/common/linux/ce2.c +++ b/drivers/gpu/nvgpu/common/linux/ce2.c | |||
@@ -54,7 +54,6 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
54 | u64 cmd_buf_gpu_va = 0; | 54 | u64 cmd_buf_gpu_va = 0; |
55 | u32 methodSize; | 55 | u32 methodSize; |
56 | u32 cmd_buf_read_offset; | 56 | u32 cmd_buf_read_offset; |
57 | u32 fence_index; | ||
58 | u32 dma_copy_class; | 57 | u32 dma_copy_class; |
59 | struct nvgpu_gpfifo gpfifo; | 58 | struct nvgpu_gpfifo gpfifo; |
60 | struct nvgpu_fence fence = {0,0}; | 59 | struct nvgpu_fence fence = {0,0}; |
@@ -87,38 +86,22 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
87 | 86 | ||
88 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | 87 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); |
89 | 88 | ||
90 | ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; | 89 | ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; |
91 | 90 | ||
92 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | 91 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * |
93 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | 92 | (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32))); |
94 | |||
95 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
96 | fence_index = (cmd_buf_read_offset + | ||
97 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
98 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
99 | |||
100 | if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { | ||
101 | ret = -ENOMEM; | ||
102 | goto noop; | ||
103 | } | ||
104 | 93 | ||
105 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | 94 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; |
106 | 95 | ||
107 | /* 0 is treated as invalid pre-sync */ | 96 | if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) { |
108 | if (cmd_buf_cpu_va[fence_index]) { | 97 | struct gk20a_fence **prev_post_fence = |
109 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | 98 | &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; |
110 | 99 | ||
111 | memcpy((void *)&ce_cmd_buf_fence_in, | 100 | ret = gk20a_fence_wait(g, *prev_post_fence, |
112 | (void *)(cmd_buf_cpu_va + fence_index), | ||
113 | sizeof(struct gk20a_fence *)); | ||
114 | ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in, | ||
115 | gk20a_get_gr_idle_timeout(g)); | 101 | gk20a_get_gr_idle_timeout(g)); |
116 | 102 | ||
117 | gk20a_fence_put(ce_cmd_buf_fence_in); | 103 | gk20a_fence_put(*prev_post_fence); |
118 | /* Reset the stored last pre-sync */ | 104 | *prev_post_fence = NULL; |
119 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
120 | 0, | ||
121 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
122 | if (ret) | 105 | if (ret) |
123 | goto noop; | 106 | goto noop; |
124 | } | 107 | } |
@@ -130,7 +113,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
130 | dst_buf, | 113 | dst_buf, |
131 | size, | 114 | size, |
132 | &cmd_buf_cpu_va[cmd_buf_read_offset], | 115 | &cmd_buf_cpu_va[cmd_buf_read_offset], |
133 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, | 116 | NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, |
134 | payload, | 117 | payload, |
135 | gk20a_get_valid_launch_flags(g, launch_flags), | 118 | gk20a_get_valid_launch_flags(g, launch_flags), |
136 | request_operation, | 119 | request_operation, |
@@ -154,10 +137,8 @@ int gk20a_ce_execute_ops(struct gk20a *g, | |||
154 | &ce_cmd_buf_fence_out, false, NULL); | 137 | &ce_cmd_buf_fence_out, false, NULL); |
155 | 138 | ||
156 | if (!ret) { | 139 | if (!ret) { |
157 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | 140 | ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = |
158 | (void *)&ce_cmd_buf_fence_out, | 141 | ce_cmd_buf_fence_out; |
159 | sizeof(struct gk20a_fence *)); | ||
160 | |||
161 | if (gk20a_fence_out) { | 142 | if (gk20a_fence_out) { |
162 | gk20a_fence_get(ce_cmd_buf_fence_out); | 143 | gk20a_fence_get(ce_cmd_buf_fence_out); |
163 | *gk20a_fence_out = ce_cmd_buf_fence_out; | 144 | *gk20a_fence_out = ce_cmd_buf_fence_out; |