summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/cde.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/cde.h')
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.h309
1 files changed, 309 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h
new file mode 100644
index 00000000..22732a2a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/cde.h
@@ -0,0 +1,309 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#define MAX_CDE_BUFS 10
23#define MAX_CDE_PARAMS 64
24#define MAX_CDE_USER_PARAMS 40
25#define MAX_CDE_ARRAY_ENTRIES 9
26
27/*
28 * The size of the context ring buffer that is dedicated for handling cde
29 * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
30 * wait on the previous job to that channel, so increasing this value
31 * reduces the likelihood of stalls.
32 */
33#define NUM_CDE_CONTEXTS 4
34
35struct dma_buf;
36struct gk20a;
37
38/*
39 * this element defines a buffer that is allocated and mapped into gpu address
40 * space. data_byte_offset defines the beginning of the buffer inside the
41 * firmare. num_bytes defines how many bytes the firmware contains.
42 *
43 * If data_byte_offset is zero, we allocate an empty buffer.
44 */
45
46struct gk20a_cde_hdr_buf {
47 u64 data_byte_offset;
48 u64 num_bytes;
49};
50
51/*
52 * this element defines a constant patching in buffers. It basically
53 * computes physical address to <source_buf>+source_byte_offset. The
54 * address is then modified into patch value as per:
55 * value = (current_value & ~mask) | (address << shift) & mask .
56 *
57 * The type field defines the register size as:
58 * 0=u32,
59 * 1=u64 (little endian),
60 * 2=u64 (big endian)
61 */
62
63struct gk20a_cde_hdr_replace {
64 u32 target_buf;
65 u32 source_buf;
66 s32 shift;
67 u32 type;
68 u64 target_byte_offset;
69 u64 source_byte_offset;
70 u64 mask;
71};
72
73enum {
74 TYPE_PARAM_TYPE_U32 = 0,
75 TYPE_PARAM_TYPE_U64_LITTLE,
76 TYPE_PARAM_TYPE_U64_BIG
77};
78
79/*
80 * this element defines a runtime patching in buffers. Parameters with id from
81 * 0 to 1024 are reserved for special usage as follows:
82 * 0 = comptags_per_cacheline,
83 * 1 = slices_per_fbp,
84 * 2 = num_fbps
85 * 3 = source buffer first page offset
86 * 4 = source buffer block height log2
87 * 5 = backing store memory address
88 * 6 = destination memory address
89 * 7 = destination size (bytes)
90 * 8 = backing store size (bytes)
91 * 9 = cache line size
92 *
93 * Parameters above id 1024 are user-specified. I.e. they determine where a
94 * parameters from user space should be placed in buffers, what is their
95 * type, etc.
96 *
97 * Once the value is available, we add data_offset to the value.
98 *
99 * The value address is then modified into patch value as per:
100 * value = (current_value & ~mask) | (address << shift) & mask .
101 *
102 * The type field defines the register size as:
103 * 0=u32,
104 * 1=u64 (little endian),
105 * 2=u64 (big endian)
106 */
107
108struct gk20a_cde_hdr_param {
109 u32 id;
110 u32 target_buf;
111 s32 shift;
112 u32 type;
113 s64 data_offset;
114 u64 target_byte_offset;
115 u64 mask;
116};
117
118enum {
119 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
120 TYPE_PARAM_GPU_CONFIGURATION,
121 TYPE_PARAM_FIRSTPAGEOFFSET,
122 TYPE_PARAM_NUMPAGES,
123 TYPE_PARAM_BACKINGSTORE,
124 TYPE_PARAM_DESTINATION,
125 TYPE_PARAM_DESTINATION_SIZE,
126 TYPE_PARAM_BACKINGSTORE_SIZE,
127 TYPE_PARAM_SOURCE_SMMU_ADDR,
128 TYPE_PARAM_BACKINGSTORE_BASE_HW,
129 TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
130 TYPE_PARAM_SCATTERBUFFER,
131 TYPE_PARAM_SCATTERBUFFER_SIZE,
132 NUM_RESERVED_PARAMS = 1024,
133};
134
135/*
136 * This header element defines a command. The op field determines whether the
137 * element is defining an init (0) or convert command (1). data_byte_offset
138 * denotes the beginning address of command elements in the file.
139 */
140
141struct gk20a_cde_hdr_command {
142 u32 op;
143 u32 num_entries;
144 u64 data_byte_offset;
145};
146
147enum {
148 TYPE_BUF_COMMAND_INIT = 0,
149 TYPE_BUF_COMMAND_CONVERT
150};
151
152/*
153 * This is a command element defines one entry inside push buffer. target_buf
154 * defines the buffer including the pushbuffer entries, target_byte_offset the
155 * offset inside the buffer and num_bytes the number of words in the buffer.
156 */
157
158struct gk20a_cde_cmd_elem {
159 u32 target_buf;
160 u32 padding;
161 u64 target_byte_offset;
162 u64 num_bytes;
163};
164
165/*
166 * This element is used for storing a small array of data.
167 */
168
169enum {
170 ARRAY_PROGRAM_OFFSET = 0,
171 ARRAY_REGISTER_COUNT,
172 ARRAY_LAUNCH_COMMAND,
173 NUM_CDE_ARRAYS
174};
175
176struct gk20a_cde_hdr_array {
177 u32 id;
178 u32 data[MAX_CDE_ARRAY_ENTRIES];
179};
180
181/*
182 * Following defines a single header element. Each element has a type and
183 * some of the data structures.
184 */
185
186struct gk20a_cde_hdr_elem {
187 u32 type;
188 u32 padding;
189 union {
190 struct gk20a_cde_hdr_buf buf;
191 struct gk20a_cde_hdr_replace replace;
192 struct gk20a_cde_hdr_param param;
193 u32 required_class;
194 struct gk20a_cde_hdr_command command;
195 struct gk20a_cde_hdr_array array;
196 };
197};
198
199enum {
200 TYPE_BUF = 0,
201 TYPE_REPLACE,
202 TYPE_PARAM,
203 TYPE_REQUIRED_CLASS,
204 TYPE_COMMAND,
205 TYPE_ARRAY
206};
207
208struct gk20a_cde_param {
209 u32 id;
210 u32 padding;
211 u64 value;
212};
213
214struct gk20a_cde_ctx {
215 struct nvgpu_os_linux *l;
216 struct device *dev;
217
218 /* channel related data */
219 struct channel_gk20a *ch;
220 struct vm_gk20a *vm;
221
222 /* buf converter configuration */
223 struct nvgpu_mem mem[MAX_CDE_BUFS];
224 unsigned int num_bufs;
225
226 /* buffer patching params (where should patching be done) */
227 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
228 unsigned int num_params;
229
230 /* storage for user space parameter values */
231 u32 user_param_values[MAX_CDE_USER_PARAMS];
232
233 u32 surf_param_offset;
234 u32 surf_param_lines;
235 u64 surf_vaddr;
236
237 u64 compbit_vaddr;
238 u64 compbit_size;
239
240 u64 scatterbuffer_vaddr;
241 u64 scatterbuffer_size;
242
243 u64 backing_store_vaddr;
244
245 struct nvgpu_gpfifo *init_convert_cmd;
246 int init_cmd_num_entries;
247
248 struct nvgpu_gpfifo *convert_cmd;
249 int convert_cmd_num_entries;
250
251 struct kobj_attribute attr;
252
253 bool init_cmd_executed;
254
255 struct nvgpu_list_node list;
256 bool is_temporary;
257 bool in_use;
258 struct delayed_work ctx_deleter_work;
259};
260
261static inline struct gk20a_cde_ctx *
262gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
263{
264 return (struct gk20a_cde_ctx *)
265 ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
266};
267
268struct gk20a_cde_app {
269 bool initialised;
270 struct nvgpu_mutex mutex;
271
272 struct nvgpu_list_node free_contexts;
273 struct nvgpu_list_node used_contexts;
274 unsigned int ctx_count;
275 unsigned int ctx_usecount;
276 unsigned int ctx_count_top;
277
278 u32 firmware_version;
279
280 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
281
282 u32 shader_parameter;
283};
284
285void gk20a_cde_destroy(struct nvgpu_os_linux *l);
286void gk20a_cde_suspend(struct nvgpu_os_linux *l);
287int gk20a_init_cde_support(struct nvgpu_os_linux *l);
288int gk20a_cde_reload(struct nvgpu_os_linux *l);
289int gk20a_cde_convert(struct nvgpu_os_linux *l,
290 struct dma_buf *compbits_buf,
291 u64 compbits_byte_offset,
292 u64 scatterbuffer_byte_offset,
293 struct nvgpu_fence *fence,
294 u32 __flags, struct gk20a_cde_param *params,
295 int num_params, struct gk20a_fence **fence_out);
296
297int gk20a_prepare_compressible_read(
298 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
299 u64 compbits_hoffset, u64 compbits_voffset,
300 u64 scatterbuffer_offset,
301 u32 width, u32 height, u32 block_height_log2,
302 u32 submit_flags, struct nvgpu_fence *fence,
303 u32 *valid_compbits, u32 *zbc_color,
304 struct gk20a_fence **fence_out);
305int gk20a_mark_compressible_write(
306 struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
307 u32 zbc_color);
308
309#endif