summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
diff options
context:
space:
mode:
authorArto Merilainen <amerilainen@nvidia.com>2014-07-21 03:21:09 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:41 -0400
commitb3e023a8055d4346b30924a03a99286926e76a15 (patch)
tree8e798c4d6ad8335616df558ec820fb71608c2980 /drivers/gpu/nvgpu/gk20a/cde_gk20a.h
parentc60a300c4ac903dd7e0b53f2542a081fa4c334cb (diff)
gpu: nvgpu: CDE support
This patch adds support for executing a precompiled GPU program to allow exporting GPU buffers to other graphics units that have color decompression engine (CDE) support. Bug 1409151 Change-Id: Id0c930923f2449b85a6555de71d7ec93eed238ae Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/360418 Reviewed-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/cde_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.h254
1 files changed, 254 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
new file mode 100644
index 00000000..784ae8b4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -0,0 +1,254 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#include "mm_gk20a.h"
23
24#define MAX_CDE_BUFS 10
25#define MAX_CDE_PARAMS 64
26#define MAX_CDE_USER_PARAMS 32
27#define MAX_CDE_OBJ_IDS 4
28
29struct dma_buf;
30struct gk20a;
31
32/*
33 * this element defines a buffer that is allocated and mapped into gpu address
34 * space. data_byte_offset defines the beginning of the buffer inside the
35 * firmare. num_bytes defines how many bytes the firmware contains.
36 *
37 * If data_byte_offset is zero, we allocate an empty buffer.
38 */
39
40struct gk20a_cde_hdr_buf {
41 u64 data_byte_offset;
42 u64 num_bytes;
43};
44
45/*
46 * this element defines a constant patching in buffers. It basically
47 * computes physical address to <source_buf>+source_byte_offset. The
48 * address is then modified into patch value as per:
49 * value = (current_value & ~mask) | (address << shift) & mask .
50 *
51 * The type field defines the register size as:
52 * 0=u32,
53 * 1=u64 (little endian),
54 * 2=u64 (big endian)
55 */
56
57struct gk20a_cde_hdr_replace {
58 u32 target_buf;
59 u32 source_buf;
60 s32 shift;
61 u32 type;
62 s64 target_byte_offset;
63 s64 source_byte_offset;
64 u64 mask;
65};
66
67enum {
68 TYPE_PARAM_TYPE_U32 = 0,
69 TYPE_PARAM_TYPE_U64_LITTLE,
70 TYPE_PARAM_TYPE_U64_BIG
71};
72
73/*
74 * this element defines a runtime patching in buffers. Parameters with id from
75 * 0 to 1024 are reserved for special usage as follows:
76 * 0 = comptags_per_cacheline,
77 * 1 = slices_per_fbp,
78 * 2 = num_fbps
79 * 3 = source buffer first page offset
80 * 4 = source buffer block height log2
81 * 5 = backing store memory address
82 * 6 = destination memory address
83 * 7 = destination size (bytes)
84 * 8 = backing store size (bytes)
85 * 9 = cache line size
86 *
87 * Parameters above id 1024 are user-specified. I.e. they determine where a
88 * parameters from user space should be placed in buffers, what is their
89 * type, etc.
90 *
91 * Once the value is available, we add data_offset to the value.
92 *
93 * The value address is then modified into patch value as per:
94 * value = (current_value & ~mask) | (address << shift) & mask .
95 *
96 * The type field defines the register size as:
97 * 0=u32,
98 * 1=u64 (little endian),
99 * 2=u64 (big endian)
100 */
101
102struct gk20a_cde_hdr_param {
103 u32 id;
104 u32 target_buf;
105 s32 shift;
106 u32 type;
107 s64 data_offset;
108 s64 target_byte_offset;
109 u64 mask;
110};
111
112enum {
113 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
114 TYPE_PARAM_GPU_CONFIGURATION,
115 TYPE_PARAM_FIRSTPAGEOFFSET,
116 TYPE_PARAM_NUMPAGES,
117 TYPE_PARAM_BACKINGSTORE,
118 TYPE_PARAM_DESTINATION,
119 TYPE_PARAM_DESTINATION_SIZE,
120 TYPE_PARAM_BACKINGSTORE_SIZE,
121 TYPE_PARAM_SOURCE_SMMU_ADDR,
122 NUM_RESERVED_PARAMS = 1024,
123};
124
125/*
126 * This header element defines a command. The op field determines whether the
127 * element is defining an init (0) or convert command (1). data_byte_offset
128 * denotes the beginning address of command elements in the file.
129 */
130
131struct gk20a_cde_hdr_command {
132 u32 op;
133 u32 num_entries;
134 u64 data_byte_offset;
135};
136
137enum {
138 TYPE_BUF_COMMAND_INIT = 0,
139 TYPE_BUF_COMMAND_CONVERT
140};
141
142/*
143 * This is a command element defines one entry inside push buffer. target_buf
144 * defines the buffer including the pushbuffer entries, target_byte_offset the
145 * offset inside the buffer and num_bytes the number of words in the buffer.
146 */
147
148struct gk20a_cde_cmd_elem {
149 u32 target_buf;
150 u32 padding;
151 u64 target_byte_offset;
152 u64 num_bytes;
153};
154
155/*
156 * Following defines a single header element. Each element has a type and
157 * some of the data structures.
158 */
159
160struct gk20a_cde_hdr_elem {
161 u32 type;
162 u32 padding;
163 union {
164 struct gk20a_cde_hdr_buf buf;
165 struct gk20a_cde_hdr_replace replace;
166 struct gk20a_cde_hdr_param param;
167 u32 required_class;
168 struct gk20a_cde_hdr_command command;
169 };
170};
171
172enum {
173 TYPE_BUF = 0,
174 TYPE_REPLACE,
175 TYPE_PARAM,
176 TYPE_REQUIRED_CLASS,
177 TYPE_COMMAND
178};
179
180struct gk20a_cde_mem_desc {
181 struct sg_table *sgt;
182 dma_addr_t iova;
183 void *cpuva;
184 size_t num_bytes;
185 u64 gpu_va;
186};
187
188struct gk20a_cde_param {
189 u32 id;
190 u32 padding;
191 u64 value;
192};
193
194struct gk20a_cde_ctx {
195 struct gk20a *g;
196 struct platform_device *pdev;
197
198 /* channel related data */
199 struct channel_gk20a *ch;
200 struct vm_gk20a *vm;
201
202 /* buf converter configuration */
203 struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS];
204 int num_bufs;
205
206 /* buffer patching params (where should patching be done) */
207 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
208 int num_params;
209
210 /* storage for user space parameter values */
211 u32 user_param_values[MAX_CDE_USER_PARAMS];
212
213 u64 src_smmu_addr;
214 u32 src_param_offset;
215 u32 src_param_lines;
216
217 u64 src_vaddr;
218
219 u64 dest_vaddr;
220 u64 dest_size;
221
222 u32 obj_ids[MAX_CDE_OBJ_IDS];
223 int num_obj_ids;
224
225 u64 backing_store_vaddr;
226
227 struct nvhost_gpfifo *init_cmd;
228 int init_cmd_num_entries;
229
230 struct nvhost_gpfifo *convert_cmd;
231 int convert_cmd_num_entries;
232
233 struct kobj_attribute attr;
234};
235
236struct gk20a_cde_app {
237 bool initialised;
238 struct mutex mutex;
239 struct vm_gk20a *vm;
240
241 struct gk20a_cde_ctx cde_ctx[1];
242 int cde_ctx_ptr;
243};
244
245int gk20a_cde_destroy(struct gk20a *g);
246int gk20a_init_cde_support(struct gk20a *g);
247int gk20a_cde_reload(struct gk20a *g);
248int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd,
249 s32 dst_kind, u64 dst_word_offset,
250 u32 dst_size, struct nvhost_fence *fence,
251 u32 __flags, struct gk20a_cde_param *params,
252 int num_params, struct gk20a_fence **fence_out);
253
254#endif