aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/cde.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/cde.h')
-rw-r--r--include/os/linux/cde.h326
1 files changed, 326 insertions, 0 deletions
diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h
new file mode 100644
index 0000000..5928b62
--- /dev/null
+++ b/include/os/linux/cde.h
@@ -0,0 +1,326 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#include <nvgpu/nvgpu_mem.h>
23#include <nvgpu/list.h>
24#include <nvgpu/lock.h>
25
26#include <linux/kobject.h>
27#include <linux/workqueue.h>
28
29#define MAX_CDE_BUFS 10
30#define MAX_CDE_PARAMS 64
31#define MAX_CDE_USER_PARAMS 40
32#define MAX_CDE_ARRAY_ENTRIES 9
33
34/*
35 * The size of the context ring buffer that is dedicated for handling cde
36 * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
37 * wait on the previous job to that channel, so increasing this value
38 * reduces the likelihood of stalls.
39 */
40#define NUM_CDE_CONTEXTS 4
41
42struct dma_buf;
43struct device;
44struct nvgpu_os_linux;
45struct gk20a;
46struct gk20a_fence;
47struct nvgpu_channel_fence;
48struct channel_gk20a;
49struct vm_gk20a;
50struct nvgpu_gpfifo_entry;
51
52/*
53 * this element defines a buffer that is allocated and mapped into gpu address
54 * space. data_byte_offset defines the beginning of the buffer inside the
55 * firmare. num_bytes defines how many bytes the firmware contains.
56 *
57 * If data_byte_offset is zero, we allocate an empty buffer.
58 */
59
60struct gk20a_cde_hdr_buf {
61 u64 data_byte_offset;
62 u64 num_bytes;
63};
64
65/*
66 * this element defines a constant patching in buffers. It basically
67 * computes physical address to <source_buf>+source_byte_offset. The
68 * address is then modified into patch value as per:
69 * value = (current_value & ~mask) | (address << shift) & mask .
70 *
71 * The type field defines the register size as:
72 * 0=u32,
73 * 1=u64 (little endian),
74 * 2=u64 (big endian)
75 */
76
77struct gk20a_cde_hdr_replace {
78 u32 target_buf;
79 u32 source_buf;
80 s32 shift;
81 u32 type;
82 u64 target_byte_offset;
83 u64 source_byte_offset;
84 u64 mask;
85};
86
87enum {
88 TYPE_PARAM_TYPE_U32 = 0,
89 TYPE_PARAM_TYPE_U64_LITTLE,
90 TYPE_PARAM_TYPE_U64_BIG
91};
92
93/*
94 * this element defines a runtime patching in buffers. Parameters with id from
95 * 0 to 1024 are reserved for special usage as follows:
96 * 0 = comptags_per_cacheline,
97 * 1 = slices_per_fbp,
98 * 2 = num_fbps
99 * 3 = source buffer first page offset
100 * 4 = source buffer block height log2
101 * 5 = backing store memory address
102 * 6 = destination memory address
103 * 7 = destination size (bytes)
104 * 8 = backing store size (bytes)
105 * 9 = cache line size
106 *
107 * Parameters above id 1024 are user-specified. I.e. they determine where a
108 * parameters from user space should be placed in buffers, what is their
109 * type, etc.
110 *
111 * Once the value is available, we add data_offset to the value.
112 *
113 * The value address is then modified into patch value as per:
114 * value = (current_value & ~mask) | (address << shift) & mask .
115 *
116 * The type field defines the register size as:
117 * 0=u32,
118 * 1=u64 (little endian),
119 * 2=u64 (big endian)
120 */
121
122struct gk20a_cde_hdr_param {
123 u32 id;
124 u32 target_buf;
125 s32 shift;
126 u32 type;
127 s64 data_offset;
128 u64 target_byte_offset;
129 u64 mask;
130};
131
132enum {
133 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
134 TYPE_PARAM_GPU_CONFIGURATION,
135 TYPE_PARAM_FIRSTPAGEOFFSET,
136 TYPE_PARAM_NUMPAGES,
137 TYPE_PARAM_BACKINGSTORE,
138 TYPE_PARAM_DESTINATION,
139 TYPE_PARAM_DESTINATION_SIZE,
140 TYPE_PARAM_BACKINGSTORE_SIZE,
141 TYPE_PARAM_SOURCE_SMMU_ADDR,
142 TYPE_PARAM_BACKINGSTORE_BASE_HW,
143 TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
144 TYPE_PARAM_SCATTERBUFFER,
145 TYPE_PARAM_SCATTERBUFFER_SIZE,
146 NUM_RESERVED_PARAMS = 1024,
147};
148
149/*
150 * This header element defines a command. The op field determines whether the
151 * element is defining an init (0) or convert command (1). data_byte_offset
152 * denotes the beginning address of command elements in the file.
153 */
154
155struct gk20a_cde_hdr_command {
156 u32 op;
157 u32 num_entries;
158 u64 data_byte_offset;
159};
160
161enum {
162 TYPE_BUF_COMMAND_INIT = 0,
163 TYPE_BUF_COMMAND_CONVERT,
164 TYPE_BUF_COMMAND_NOOP
165};
166
167/*
168 * This is a command element defines one entry inside push buffer. target_buf
169 * defines the buffer including the pushbuffer entries, target_byte_offset the
170 * offset inside the buffer and num_bytes the number of words in the buffer.
171 */
172
173struct gk20a_cde_cmd_elem {
174 u32 target_buf;
175 u32 padding;
176 u64 target_byte_offset;
177 u64 num_bytes;
178};
179
180/*
181 * This element is used for storing a small array of data.
182 */
183
184enum {
185 ARRAY_PROGRAM_OFFSET = 0,
186 ARRAY_REGISTER_COUNT,
187 ARRAY_LAUNCH_COMMAND,
188 NUM_CDE_ARRAYS
189};
190
191struct gk20a_cde_hdr_array {
192 u32 id;
193 u32 data[MAX_CDE_ARRAY_ENTRIES];
194};
195
196/*
197 * Following defines a single header element. Each element has a type and
198 * some of the data structures.
199 */
200
201struct gk20a_cde_hdr_elem {
202 u32 type;
203 u32 padding;
204 union {
205 struct gk20a_cde_hdr_buf buf;
206 struct gk20a_cde_hdr_replace replace;
207 struct gk20a_cde_hdr_param param;
208 u32 required_class;
209 struct gk20a_cde_hdr_command command;
210 struct gk20a_cde_hdr_array array;
211 };
212};
213
214enum {
215 TYPE_BUF = 0,
216 TYPE_REPLACE,
217 TYPE_PARAM,
218 TYPE_REQUIRED_CLASS,
219 TYPE_COMMAND,
220 TYPE_ARRAY
221};
222
223struct gk20a_cde_param {
224 u32 id;
225 u32 padding;
226 u64 value;
227};
228
229struct gk20a_cde_ctx {
230 struct nvgpu_os_linux *l;
231 struct device *dev;
232
233 /* channel related data */
234 struct channel_gk20a *ch;
235 struct tsg_gk20a *tsg;
236 struct vm_gk20a *vm;
237
238 /* buf converter configuration */
239 struct nvgpu_mem mem[MAX_CDE_BUFS];
240 unsigned int num_bufs;
241
242 /* buffer patching params (where should patching be done) */
243 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
244 unsigned int num_params;
245
246 /* storage for user space parameter values */
247 u32 user_param_values[MAX_CDE_USER_PARAMS];
248
249 u32 surf_param_offset;
250 u32 surf_param_lines;
251 u64 surf_vaddr;
252
253 u64 compbit_vaddr;
254 u64 compbit_size;
255
256 u64 scatterbuffer_vaddr;
257 u64 scatterbuffer_size;
258
259 u64 backing_store_vaddr;
260
261 struct nvgpu_gpfifo_entry *init_convert_cmd;
262 int init_cmd_num_entries;
263
264 struct nvgpu_gpfifo_entry *convert_cmd;
265 int convert_cmd_num_entries;
266
267 struct kobj_attribute attr;
268
269 bool init_cmd_executed;
270
271 struct nvgpu_list_node list;
272 bool is_temporary;
273 bool in_use;
274 struct delayed_work ctx_deleter_work;
275};
276
277static inline struct gk20a_cde_ctx *
278gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
279{
280 return (struct gk20a_cde_ctx *)
281 ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
282};
283
284struct gk20a_cde_app {
285 bool initialised;
286 struct nvgpu_mutex mutex;
287
288 struct nvgpu_list_node free_contexts;
289 struct nvgpu_list_node used_contexts;
290 unsigned int ctx_count;
291 unsigned int ctx_usecount;
292 unsigned int ctx_count_top;
293
294 u32 firmware_version;
295
296 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
297
298 u32 shader_parameter;
299};
300
301void gk20a_cde_destroy(struct nvgpu_os_linux *l);
302void gk20a_cde_suspend(struct nvgpu_os_linux *l);
303int gk20a_init_cde_support(struct nvgpu_os_linux *l);
304int gk20a_cde_reload(struct nvgpu_os_linux *l);
305int gk20a_cde_convert(struct nvgpu_os_linux *l,
306 struct dma_buf *compbits_buf,
307 u64 compbits_byte_offset,
308 u64 scatterbuffer_byte_offset,
309 struct nvgpu_channel_fence *fence,
310 u32 __flags, struct gk20a_cde_param *params,
311 int num_params, struct gk20a_fence **fence_out);
312
313int gk20a_prepare_compressible_read(
314 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
315 u64 compbits_hoffset, u64 compbits_voffset,
316 u64 scatterbuffer_offset,
317 u32 width, u32 height, u32 block_height_log2,
318 u32 submit_flags, struct nvgpu_channel_fence *fence,
319 u32 *valid_compbits, u32 *zbc_color,
320 struct gk20a_fence **fence_out);
321int gk20a_mark_compressible_write(
322 struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
323 u32 zbc_color);
324int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
325
326#endif