diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/ce2.c | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/ce2.c b/drivers/gpu/nvgpu/common/ce2.c new file mode 100644 index 00000000..9385b531 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ce2.c | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/types.h> | ||
24 | #include <nvgpu/channel.h> | ||
25 | |||
26 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
27 | |||
28 | #include "gk20a/ce2_gk20a.h" | ||
29 | #include "gk20a/gk20a.h" | ||
30 | |||
31 | static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) | ||
32 | { | ||
33 | /* there is no local memory available, | ||
34 | don't allow local memory related CE flags */ | ||
35 | if (!g->mm.vidmem.size) { | ||
36 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | ||
37 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | ||
38 | } | ||
39 | return launch_flags; | ||
40 | } | ||
41 | |||
42 | int gk20a_ce_execute_ops(struct gk20a *g, | ||
43 | u32 ce_ctx_id, | ||
44 | u64 src_buf, | ||
45 | u64 dst_buf, | ||
46 | u64 size, | ||
47 | unsigned int payload, | ||
48 | int launch_flags, | ||
49 | int request_operation, | ||
50 | u32 submit_flags, | ||
51 | struct gk20a_fence **gk20a_fence_out) | ||
52 | { | ||
53 | int ret = -EPERM; | ||
54 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
55 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
56 | bool found = false; | ||
57 | u32 *cmd_buf_cpu_va; | ||
58 | u64 cmd_buf_gpu_va = 0; | ||
59 | u32 methodSize; | ||
60 | u32 cmd_buf_read_offset; | ||
61 | u32 dma_copy_class; | ||
62 | struct nvgpu_gpfifo_entry gpfifo; | ||
63 | struct nvgpu_channel_fence fence = {0, 0}; | ||
64 | struct gk20a_fence *ce_cmd_buf_fence_out = NULL; | ||
65 | |||
66 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
67 | goto end; | ||
68 | |||
69 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
70 | |||
71 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
72 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
73 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
74 | found = true; | ||
75 | break; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
80 | |||
81 | if (!found) { | ||
82 | ret = -EINVAL; | ||
83 | goto end; | ||
84 | } | ||
85 | |||
86 | if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { | ||
87 | ret = -ENODEV; | ||
88 | goto end; | ||
89 | } | ||
90 | |||
91 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | ||
92 | |||
93 | ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; | ||
94 | |||
95 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | ||
96 | (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32))); | ||
97 | |||
98 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
99 | |||
100 | if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) { | ||
101 | struct gk20a_fence **prev_post_fence = | ||
102 | &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; | ||
103 | |||
104 | ret = gk20a_fence_wait(g, *prev_post_fence, | ||
105 | gk20a_get_gr_idle_timeout(g)); | ||
106 | |||
107 | gk20a_fence_put(*prev_post_fence); | ||
108 | *prev_post_fence = NULL; | ||
109 | if (ret) | ||
110 | goto noop; | ||
111 | } | ||
112 | |||
113 | cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); | ||
114 | |||
115 | dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); | ||
116 | methodSize = gk20a_ce_prepare_submit(src_buf, | ||
117 | dst_buf, | ||
118 | size, | ||
119 | &cmd_buf_cpu_va[cmd_buf_read_offset], | ||
120 | NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, | ||
121 | payload, | ||
122 | gk20a_get_valid_launch_flags(g, launch_flags), | ||
123 | request_operation, | ||
124 | dma_copy_class); | ||
125 | |||
126 | if (methodSize) { | ||
127 | /* store the element into gpfifo */ | ||
128 | gpfifo.entry0 = | ||
129 | u64_lo32(cmd_buf_gpu_va); | ||
130 | gpfifo.entry1 = | ||
131 | (u64_hi32(cmd_buf_gpu_va) | | ||
132 | pbdma_gp_entry1_length_f(methodSize)); | ||
133 | |||
134 | /* take always the postfence as it is needed for protecting the ce context */ | ||
135 | submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
136 | |||
137 | nvgpu_smp_wmb(); | ||
138 | |||
139 | ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, | ||
140 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out); | ||
141 | |||
142 | if (!ret) { | ||
143 | ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = | ||
144 | ce_cmd_buf_fence_out; | ||
145 | if (gk20a_fence_out) { | ||
146 | gk20a_fence_get(ce_cmd_buf_fence_out); | ||
147 | *gk20a_fence_out = ce_cmd_buf_fence_out; | ||
148 | } | ||
149 | |||
150 | /* Next available command buffer queue Index */ | ||
151 | ++ce_ctx->cmd_buf_read_queue_offset; | ||
152 | } | ||
153 | } else { | ||
154 | ret = -ENOMEM; | ||
155 | } | ||
156 | noop: | ||
157 | nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); | ||
158 | end: | ||
159 | return ret; | ||
160 | } | ||