diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/cde.h')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/cde.h | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h new file mode 100644 index 00000000..5dd15c37 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/cde.h | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #include <nvgpu/nvgpu_mem.h> | ||
23 | #include <nvgpu/list.h> | ||
24 | #include <nvgpu/lock.h> | ||
25 | |||
26 | #include <linux/kobject.h> | ||
27 | #include <linux/workqueue.h> | ||
28 | |||
29 | #define MAX_CDE_BUFS 10 | ||
30 | #define MAX_CDE_PARAMS 64 | ||
31 | #define MAX_CDE_USER_PARAMS 40 | ||
32 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
33 | |||
34 | /* | ||
35 | * The size of the context ring buffer that is dedicated for handling cde | ||
36 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
37 | * wait on the previous job to that channel, so increasing this value | ||
38 | * reduces the likelihood of stalls. | ||
39 | */ | ||
40 | #define NUM_CDE_CONTEXTS 4 | ||
41 | |||
42 | struct dma_buf; | ||
43 | struct device; | ||
44 | struct nvgpu_os_linux; | ||
45 | struct gk20a; | ||
46 | struct gk20a_fence; | ||
47 | struct nvgpu_fence; | ||
48 | struct channel_gk20a; | ||
49 | struct vm_gk20a; | ||
50 | struct nvgpu_gpfifo; | ||
51 | |||
52 | /* | ||
53 | * this element defines a buffer that is allocated and mapped into gpu address | ||
54 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
55 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
56 | * | ||
57 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
58 | */ | ||
59 | |||
60 | struct gk20a_cde_hdr_buf { | ||
61 | u64 data_byte_offset; | ||
62 | u64 num_bytes; | ||
63 | }; | ||
64 | |||
65 | /* | ||
66 | * this element defines a constant patching in buffers. It basically | ||
67 | * computes physical address to <source_buf>+source_byte_offset. The | ||
68 | * address is then modified into patch value as per: | ||
69 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
70 | * | ||
71 | * The type field defines the register size as: | ||
72 | * 0=u32, | ||
73 | * 1=u64 (little endian), | ||
74 | * 2=u64 (big endian) | ||
75 | */ | ||
76 | |||
77 | struct gk20a_cde_hdr_replace { | ||
78 | u32 target_buf; | ||
79 | u32 source_buf; | ||
80 | s32 shift; | ||
81 | u32 type; | ||
82 | u64 target_byte_offset; | ||
83 | u64 source_byte_offset; | ||
84 | u64 mask; | ||
85 | }; | ||
86 | |||
87 | enum { | ||
88 | TYPE_PARAM_TYPE_U32 = 0, | ||
89 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
90 | TYPE_PARAM_TYPE_U64_BIG | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * this element defines a runtime patching in buffers. Parameters with id from | ||
95 | * 0 to 1024 are reserved for special usage as follows: | ||
96 | * 0 = comptags_per_cacheline, | ||
97 | * 1 = slices_per_fbp, | ||
98 | * 2 = num_fbps | ||
99 | * 3 = source buffer first page offset | ||
100 | * 4 = source buffer block height log2 | ||
101 | * 5 = backing store memory address | ||
102 | * 6 = destination memory address | ||
103 | * 7 = destination size (bytes) | ||
104 | * 8 = backing store size (bytes) | ||
105 | * 9 = cache line size | ||
106 | * | ||
107 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
108 | * parameters from user space should be placed in buffers, what is their | ||
109 | * type, etc. | ||
110 | * | ||
111 | * Once the value is available, we add data_offset to the value. | ||
112 | * | ||
113 | * The value address is then modified into patch value as per: | ||
114 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
115 | * | ||
116 | * The type field defines the register size as: | ||
117 | * 0=u32, | ||
118 | * 1=u64 (little endian), | ||
119 | * 2=u64 (big endian) | ||
120 | */ | ||
121 | |||
122 | struct gk20a_cde_hdr_param { | ||
123 | u32 id; | ||
124 | u32 target_buf; | ||
125 | s32 shift; | ||
126 | u32 type; | ||
127 | s64 data_offset; | ||
128 | u64 target_byte_offset; | ||
129 | u64 mask; | ||
130 | }; | ||
131 | |||
132 | enum { | ||
133 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
134 | TYPE_PARAM_GPU_CONFIGURATION, | ||
135 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
136 | TYPE_PARAM_NUMPAGES, | ||
137 | TYPE_PARAM_BACKINGSTORE, | ||
138 | TYPE_PARAM_DESTINATION, | ||
139 | TYPE_PARAM_DESTINATION_SIZE, | ||
140 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
141 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
142 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
143 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
144 | TYPE_PARAM_SCATTERBUFFER, | ||
145 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
146 | NUM_RESERVED_PARAMS = 1024, | ||
147 | }; | ||
148 | |||
149 | /* | ||
150 | * This header element defines a command. The op field determines whether the | ||
151 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
152 | * denotes the beginning address of command elements in the file. | ||
153 | */ | ||
154 | |||
155 | struct gk20a_cde_hdr_command { | ||
156 | u32 op; | ||
157 | u32 num_entries; | ||
158 | u64 data_byte_offset; | ||
159 | }; | ||
160 | |||
161 | enum { | ||
162 | TYPE_BUF_COMMAND_INIT = 0, | ||
163 | TYPE_BUF_COMMAND_CONVERT | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * This is a command element defines one entry inside push buffer. target_buf | ||
168 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
169 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
170 | */ | ||
171 | |||
172 | struct gk20a_cde_cmd_elem { | ||
173 | u32 target_buf; | ||
174 | u32 padding; | ||
175 | u64 target_byte_offset; | ||
176 | u64 num_bytes; | ||
177 | }; | ||
178 | |||
179 | /* | ||
180 | * This element is used for storing a small array of data. | ||
181 | */ | ||
182 | |||
183 | enum { | ||
184 | ARRAY_PROGRAM_OFFSET = 0, | ||
185 | ARRAY_REGISTER_COUNT, | ||
186 | ARRAY_LAUNCH_COMMAND, | ||
187 | NUM_CDE_ARRAYS | ||
188 | }; | ||
189 | |||
190 | struct gk20a_cde_hdr_array { | ||
191 | u32 id; | ||
192 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
193 | }; | ||
194 | |||
195 | /* | ||
196 | * Following defines a single header element. Each element has a type and | ||
197 | * some of the data structures. | ||
198 | */ | ||
199 | |||
200 | struct gk20a_cde_hdr_elem { | ||
201 | u32 type; | ||
202 | u32 padding; | ||
203 | union { | ||
204 | struct gk20a_cde_hdr_buf buf; | ||
205 | struct gk20a_cde_hdr_replace replace; | ||
206 | struct gk20a_cde_hdr_param param; | ||
207 | u32 required_class; | ||
208 | struct gk20a_cde_hdr_command command; | ||
209 | struct gk20a_cde_hdr_array array; | ||
210 | }; | ||
211 | }; | ||
212 | |||
213 | enum { | ||
214 | TYPE_BUF = 0, | ||
215 | TYPE_REPLACE, | ||
216 | TYPE_PARAM, | ||
217 | TYPE_REQUIRED_CLASS, | ||
218 | TYPE_COMMAND, | ||
219 | TYPE_ARRAY | ||
220 | }; | ||
221 | |||
222 | struct gk20a_cde_param { | ||
223 | u32 id; | ||
224 | u32 padding; | ||
225 | u64 value; | ||
226 | }; | ||
227 | |||
228 | struct gk20a_cde_ctx { | ||
229 | struct nvgpu_os_linux *l; | ||
230 | struct device *dev; | ||
231 | |||
232 | /* channel related data */ | ||
233 | struct channel_gk20a *ch; | ||
234 | struct vm_gk20a *vm; | ||
235 | |||
236 | /* buf converter configuration */ | ||
237 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
238 | unsigned int num_bufs; | ||
239 | |||
240 | /* buffer patching params (where should patching be done) */ | ||
241 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
242 | unsigned int num_params; | ||
243 | |||
244 | /* storage for user space parameter values */ | ||
245 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
246 | |||
247 | u32 surf_param_offset; | ||
248 | u32 surf_param_lines; | ||
249 | u64 surf_vaddr; | ||
250 | |||
251 | u64 compbit_vaddr; | ||
252 | u64 compbit_size; | ||
253 | |||
254 | u64 scatterbuffer_vaddr; | ||
255 | u64 scatterbuffer_size; | ||
256 | |||
257 | u64 backing_store_vaddr; | ||
258 | |||
259 | struct nvgpu_gpfifo *init_convert_cmd; | ||
260 | int init_cmd_num_entries; | ||
261 | |||
262 | struct nvgpu_gpfifo *convert_cmd; | ||
263 | int convert_cmd_num_entries; | ||
264 | |||
265 | struct kobj_attribute attr; | ||
266 | |||
267 | bool init_cmd_executed; | ||
268 | |||
269 | struct nvgpu_list_node list; | ||
270 | bool is_temporary; | ||
271 | bool in_use; | ||
272 | struct delayed_work ctx_deleter_work; | ||
273 | }; | ||
274 | |||
275 | static inline struct gk20a_cde_ctx * | ||
276 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
277 | { | ||
278 | return (struct gk20a_cde_ctx *) | ||
279 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
280 | }; | ||
281 | |||
282 | struct gk20a_cde_app { | ||
283 | bool initialised; | ||
284 | struct nvgpu_mutex mutex; | ||
285 | |||
286 | struct nvgpu_list_node free_contexts; | ||
287 | struct nvgpu_list_node used_contexts; | ||
288 | unsigned int ctx_count; | ||
289 | unsigned int ctx_usecount; | ||
290 | unsigned int ctx_count_top; | ||
291 | |||
292 | u32 firmware_version; | ||
293 | |||
294 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
295 | |||
296 | u32 shader_parameter; | ||
297 | }; | ||
298 | |||
299 | void gk20a_cde_destroy(struct nvgpu_os_linux *l); | ||
300 | void gk20a_cde_suspend(struct nvgpu_os_linux *l); | ||
301 | int gk20a_init_cde_support(struct nvgpu_os_linux *l); | ||
302 | int gk20a_cde_reload(struct nvgpu_os_linux *l); | ||
303 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
304 | struct dma_buf *compbits_buf, | ||
305 | u64 compbits_byte_offset, | ||
306 | u64 scatterbuffer_byte_offset, | ||
307 | struct nvgpu_fence *fence, | ||
308 | u32 __flags, struct gk20a_cde_param *params, | ||
309 | int num_params, struct gk20a_fence **fence_out); | ||
310 | |||
311 | int gk20a_prepare_compressible_read( | ||
312 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
313 | u64 compbits_hoffset, u64 compbits_voffset, | ||
314 | u64 scatterbuffer_offset, | ||
315 | u32 width, u32 height, u32 block_height_log2, | ||
316 | u32 submit_flags, struct nvgpu_fence *fence, | ||
317 | u32 *valid_compbits, u32 *zbc_color, | ||
318 | struct gk20a_fence **fence_out); | ||
319 | int gk20a_mark_compressible_write( | ||
320 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
321 | u32 zbc_color); | ||
322 | |||
323 | #endif | ||