drivers/gpu/nvgpu/os/linux/cde.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

/*
 * GK20A color decompression engine support
 *
 * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef _CDE_GK20A_H_
#define _CDE_GK20A_H_

#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/list.h>
#include <nvgpu/lock.h>

#include <linux/kobject.h>
#include <linux/workqueue.h>

#define MAX_CDE_BUFS		10
#define MAX_CDE_PARAMS		64
#define MAX_CDE_USER_PARAMS	40
#define MAX_CDE_ARRAY_ENTRIES	9

/*
 * The size of the context ring buffer that is dedicated for handling cde
 * jobs.  Re-using a context (=channel) for a differnt cde job forces a cpu
 * wait on the previous job to that channel, so increasing this value
 * reduces the likelihood of stalls.
 */
#define NUM_CDE_CONTEXTS	4

struct dma_buf;
struct device;
struct nvgpu_os_linux;
struct gk20a;
struct gk20a_fence;
struct nvgpu_channel_fence;
struct channel_gk20a;
struct vm_gk20a;
struct nvgpu_gpfifo_entry;

/*
 * this element defines a buffer that is allocated and mapped into gpu address
 * space. data_byte_offset defines the beginning of the buffer inside the
 * firmare. num_bytes defines how many bytes the firmware contains.
 *
 * If data_byte_offset is zero, we allocate an empty buffer.
 */

struct gk20a_cde_hdr_buf {
	u64 data_byte_offset;
	u64 num_bytes;
};

/*
 * this element defines a constant patching in buffers. It basically
 * computes physical address to <source_buf>+source_byte_offset. The
 * address is then modified into patch value as per:
 *    value = (current_value & ~mask) | (address << shift) & mask .
 *
 * The type field defines the register size as:
 *  0=u32,
 *  1=u64 (little endian),
 *  2=u64 (big endian)
 */

struct gk20a_cde_hdr_replace {
	u32 target_buf;
	u32 source_buf;
	s32 shift;
	u32 type;
	u64 target_byte_offset;
	u64 source_byte_offset;
	u64 mask;
};

enum {
	TYPE_PARAM_TYPE_U32 = 0,
	TYPE_PARAM_TYPE_U64_LITTLE,
	TYPE_PARAM_TYPE_U64_BIG
};

/*
 * this element defines a runtime patching in buffers. Parameters with id from
 * 0 to 1024 are reserved for special usage as follows:
 *   0 = comptags_per_cacheline,
 *   1 = slices_per_fbp,
 *   2 = num_fbps
 *   3 = source buffer first page offset
 *   4 = source buffer block height log2
 *   5 = backing store memory address
 *   6 = destination memory address
 *   7 = destination size (bytes)
 *   8 = backing store size (bytes)
 *   9 = cache line size
 *
 * Parameters above id 1024 are user-specified. I.e. they determine where a
 * parameters from user space should be placed in buffers, what is their
 * type, etc.
 *
 * Once the value is available, we add data_offset to the value.
 *
 * The value address is then modified into patch value as per:
 *    value = (current_value & ~mask) | (address << shift) & mask .
 *
 * The type field defines the register size as:
 *  0=u32,
 *  1=u64 (little endian),
 *  2=u64 (big endian)
 */

struct gk20a_cde_hdr_param {
	u32 id;
	u32 target_buf;
	s32 shift;
	u32 type;
	s64 data_offset;
	u64 target_byte_offset;
	u64 mask;
};

enum {
	TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
	TYPE_PARAM_GPU_CONFIGURATION,
	TYPE_PARAM_FIRSTPAGEOFFSET,
	TYPE_PARAM_NUMPAGES,
	TYPE_PARAM_BACKINGSTORE,
	TYPE_PARAM_DESTINATION,
	TYPE_PARAM_DESTINATION_SIZE,
	TYPE_PARAM_BACKINGSTORE_SIZE,
	TYPE_PARAM_SOURCE_SMMU_ADDR,
	TYPE_PARAM_BACKINGSTORE_BASE_HW,
	TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
	TYPE_PARAM_SCATTERBUFFER,
	TYPE_PARAM_SCATTERBUFFER_SIZE,
	NUM_RESERVED_PARAMS = 1024,
};

/*
 * This header element defines a command. The op field determines whether the
 * element is defining an init (0) or convert command (1). data_byte_offset
 * denotes the beginning address of command elements in the file.
 */

struct gk20a_cde_hdr_command {
	u32 op;
	u32 num_entries;
	u64 data_byte_offset;
};

enum {
	TYPE_BUF_COMMAND_INIT = 0,
	TYPE_BUF_COMMAND_CONVERT,
	TYPE_BUF_COMMAND_NOOP
};

/*
 * This is a command element defines one entry inside push buffer. target_buf
 * defines the buffer including the pushbuffer entries, target_byte_offset the
 * offset inside the buffer and num_bytes the number of words in the buffer.
 */

struct gk20a_cde_cmd_elem {
	u32 target_buf;
	u32 padding;
	u64 target_byte_offset;
	u64 num_bytes;
};

/*
 * This element is used for storing a small array of data.
 */

enum {
	ARRAY_PROGRAM_OFFSET = 0,
	ARRAY_REGISTER_COUNT,
	ARRAY_LAUNCH_COMMAND,
	NUM_CDE_ARRAYS
};

struct gk20a_cde_hdr_array {
	u32 id;
	u32 data[MAX_CDE_ARRAY_ENTRIES];
};

/*
 * Following defines a single header element. Each element has a type and
 * some of the data structures.
 */

struct gk20a_cde_hdr_elem {
	u32 type;
	u32 padding;
	union {
		struct gk20a_cde_hdr_buf buf;
		struct gk20a_cde_hdr_replace replace;
		struct gk20a_cde_hdr_param param;
		u32 required_class;
		struct gk20a_cde_hdr_command command;
		struct gk20a_cde_hdr_array array;
	};
};

enum {
	TYPE_BUF = 0,
	TYPE_REPLACE,
	TYPE_PARAM,
	TYPE_REQUIRED_CLASS,
	TYPE_COMMAND,
	TYPE_ARRAY
};

struct gk20a_cde_param {
	u32 id;
	u32 padding;
	u64 value;
};

struct gk20a_cde_ctx {
	struct nvgpu_os_linux *l;
	struct device *dev;

	/* channel related data */
	struct channel_gk20a *ch;
	struct tsg_gk20a *tsg;
	struct vm_gk20a *vm;

	/* buf converter configuration */
	struct nvgpu_mem mem[MAX_CDE_BUFS];
	unsigned int num_bufs;

	/* buffer patching params (where should patching be done) */
	struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
	unsigned int num_params;

	/* storage for user space parameter values */
	u32 user_param_values[MAX_CDE_USER_PARAMS];

	u32 surf_param_offset;
	u32 surf_param_lines;
	u64 surf_vaddr;

	u64 compbit_vaddr;
	u64 compbit_size;

	u64 scatterbuffer_vaddr;
	u64 scatterbuffer_size;

	u64 backing_store_vaddr;

	struct nvgpu_gpfifo_entry *init_convert_cmd;
	int init_cmd_num_entries;

	struct nvgpu_gpfifo_entry *convert_cmd;
	int convert_cmd_num_entries;

	struct kobj_attribute attr;

	bool init_cmd_executed;

	struct nvgpu_list_node list;
	bool is_temporary;
	bool in_use;
	struct delayed_work ctx_deleter_work;
};

static inline struct gk20a_cde_ctx *
gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
{
	return (struct gk20a_cde_ctx *)
		((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
};

struct gk20a_cde_app {
	bool initialised;
	struct nvgpu_mutex mutex;

	struct nvgpu_list_node free_contexts;
	struct nvgpu_list_node used_contexts;
	unsigned int ctx_count;
	unsigned int ctx_usecount;
	unsigned int ctx_count_top;

	u32 firmware_version;

	u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];

	u32 shader_parameter;
};

void gk20a_cde_destroy(struct nvgpu_os_linux *l);
void gk20a_cde_suspend(struct nvgpu_os_linux *l);
int gk20a_init_cde_support(struct nvgpu_os_linux *l);
int gk20a_cde_reload(struct nvgpu_os_linux *l);
int gk20a_cde_convert(struct nvgpu_os_linux *l,
		struct dma_buf *compbits_buf,
		u64 compbits_byte_offset,
		u64 scatterbuffer_byte_offset,
		struct nvgpu_channel_fence *fence,
		u32 __flags, struct gk20a_cde_param *params,
		int num_params, struct gk20a_fence **fence_out);

int gk20a_prepare_compressible_read(
		struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
		u64 compbits_hoffset, u64 compbits_voffset,
		u64 scatterbuffer_offset,
		u32 width, u32 height, u32 block_height_log2,
		u32 submit_flags, struct nvgpu_channel_fence *fence,
		u32 *valid_compbits, u32 *zbc_color,
		struct gk20a_fence **fence_out);
int gk20a_mark_compressible_write(
		struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
		u32 zbc_color);
int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);

#endif