diff options
author | Arto Merilainen <amerilainen@nvidia.com> | 2014-07-21 03:21:09 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:10:41 -0400 |
commit | b3e023a8055d4346b30924a03a99286926e76a15 (patch) | |
tree | 8e798c4d6ad8335616df558ec820fb71608c2980 /drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |
parent | c60a300c4ac903dd7e0b53f2542a081fa4c334cb (diff) |
gpu: nvgpu: CDE support
This patch adds support for executing a precompiled GPU program to
allow exporting GPU buffers to other graphics units that have color
decompression engine (CDE) support.
Bug 1409151
Change-Id: Id0c930923f2449b85a6555de71d7ec93eed238ae
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-on: http://git-master/r/360418
Reviewed-by: Lauri Peltonen <lpeltonen@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/cde_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h new file mode 100644 index 00000000..784ae8b4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |||
@@ -0,0 +1,254 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #include "mm_gk20a.h" | ||
23 | |||
24 | #define MAX_CDE_BUFS 10 | ||
25 | #define MAX_CDE_PARAMS 64 | ||
26 | #define MAX_CDE_USER_PARAMS 32 | ||
27 | #define MAX_CDE_OBJ_IDS 4 | ||
28 | |||
29 | struct dma_buf; | ||
30 | struct gk20a; | ||
31 | |||
32 | /* | ||
33 | * this element defines a buffer that is allocated and mapped into gpu address | ||
34 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
35 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
36 | * | ||
37 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
38 | */ | ||
39 | |||
40 | struct gk20a_cde_hdr_buf { | ||
41 | u64 data_byte_offset; | ||
42 | u64 num_bytes; | ||
43 | }; | ||
44 | |||
45 | /* | ||
46 | * this element defines a constant patching in buffers. It basically | ||
47 | * computes physical address to <source_buf>+source_byte_offset. The | ||
48 | * address is then modified into patch value as per: | ||
49 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
50 | * | ||
51 | * The type field defines the register size as: | ||
52 | * 0=u32, | ||
53 | * 1=u64 (little endian), | ||
54 | * 2=u64 (big endian) | ||
55 | */ | ||
56 | |||
57 | struct gk20a_cde_hdr_replace { | ||
58 | u32 target_buf; | ||
59 | u32 source_buf; | ||
60 | s32 shift; | ||
61 | u32 type; | ||
62 | s64 target_byte_offset; | ||
63 | s64 source_byte_offset; | ||
64 | u64 mask; | ||
65 | }; | ||
66 | |||
67 | enum { | ||
68 | TYPE_PARAM_TYPE_U32 = 0, | ||
69 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
70 | TYPE_PARAM_TYPE_U64_BIG | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * this element defines a runtime patching in buffers. Parameters with id from | ||
75 | * 0 to 1024 are reserved for special usage as follows: | ||
76 | * 0 = comptags_per_cacheline, | ||
77 | * 1 = slices_per_fbp, | ||
78 | * 2 = num_fbps | ||
79 | * 3 = source buffer first page offset | ||
80 | * 4 = source buffer block height log2 | ||
81 | * 5 = backing store memory address | ||
82 | * 6 = destination memory address | ||
83 | * 7 = destination size (bytes) | ||
84 | * 8 = backing store size (bytes) | ||
85 | * 9 = cache line size | ||
86 | * | ||
87 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
88 | * parameters from user space should be placed in buffers, what is their | ||
89 | * type, etc. | ||
90 | * | ||
91 | * Once the value is available, we add data_offset to the value. | ||
92 | * | ||
93 | * The value address is then modified into patch value as per: | ||
94 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
95 | * | ||
96 | * The type field defines the register size as: | ||
97 | * 0=u32, | ||
98 | * 1=u64 (little endian), | ||
99 | * 2=u64 (big endian) | ||
100 | */ | ||
101 | |||
102 | struct gk20a_cde_hdr_param { | ||
103 | u32 id; | ||
104 | u32 target_buf; | ||
105 | s32 shift; | ||
106 | u32 type; | ||
107 | s64 data_offset; | ||
108 | s64 target_byte_offset; | ||
109 | u64 mask; | ||
110 | }; | ||
111 | |||
112 | enum { | ||
113 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
114 | TYPE_PARAM_GPU_CONFIGURATION, | ||
115 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
116 | TYPE_PARAM_NUMPAGES, | ||
117 | TYPE_PARAM_BACKINGSTORE, | ||
118 | TYPE_PARAM_DESTINATION, | ||
119 | TYPE_PARAM_DESTINATION_SIZE, | ||
120 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
121 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
122 | NUM_RESERVED_PARAMS = 1024, | ||
123 | }; | ||
124 | |||
125 | /* | ||
126 | * This header element defines a command. The op field determines whether the | ||
127 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
128 | * denotes the beginning address of command elements in the file. | ||
129 | */ | ||
130 | |||
131 | struct gk20a_cde_hdr_command { | ||
132 | u32 op; | ||
133 | u32 num_entries; | ||
134 | u64 data_byte_offset; | ||
135 | }; | ||
136 | |||
137 | enum { | ||
138 | TYPE_BUF_COMMAND_INIT = 0, | ||
139 | TYPE_BUF_COMMAND_CONVERT | ||
140 | }; | ||
141 | |||
142 | /* | ||
143 | * This is a command element defines one entry inside push buffer. target_buf | ||
144 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
145 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
146 | */ | ||
147 | |||
148 | struct gk20a_cde_cmd_elem { | ||
149 | u32 target_buf; | ||
150 | u32 padding; | ||
151 | u64 target_byte_offset; | ||
152 | u64 num_bytes; | ||
153 | }; | ||
154 | |||
155 | /* | ||
156 | * Following defines a single header element. Each element has a type and | ||
157 | * some of the data structures. | ||
158 | */ | ||
159 | |||
160 | struct gk20a_cde_hdr_elem { | ||
161 | u32 type; | ||
162 | u32 padding; | ||
163 | union { | ||
164 | struct gk20a_cde_hdr_buf buf; | ||
165 | struct gk20a_cde_hdr_replace replace; | ||
166 | struct gk20a_cde_hdr_param param; | ||
167 | u32 required_class; | ||
168 | struct gk20a_cde_hdr_command command; | ||
169 | }; | ||
170 | }; | ||
171 | |||
172 | enum { | ||
173 | TYPE_BUF = 0, | ||
174 | TYPE_REPLACE, | ||
175 | TYPE_PARAM, | ||
176 | TYPE_REQUIRED_CLASS, | ||
177 | TYPE_COMMAND | ||
178 | }; | ||
179 | |||
180 | struct gk20a_cde_mem_desc { | ||
181 | struct sg_table *sgt; | ||
182 | dma_addr_t iova; | ||
183 | void *cpuva; | ||
184 | size_t num_bytes; | ||
185 | u64 gpu_va; | ||
186 | }; | ||
187 | |||
188 | struct gk20a_cde_param { | ||
189 | u32 id; | ||
190 | u32 padding; | ||
191 | u64 value; | ||
192 | }; | ||
193 | |||
194 | struct gk20a_cde_ctx { | ||
195 | struct gk20a *g; | ||
196 | struct platform_device *pdev; | ||
197 | |||
198 | /* channel related data */ | ||
199 | struct channel_gk20a *ch; | ||
200 | struct vm_gk20a *vm; | ||
201 | |||
202 | /* buf converter configuration */ | ||
203 | struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS]; | ||
204 | int num_bufs; | ||
205 | |||
206 | /* buffer patching params (where should patching be done) */ | ||
207 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
208 | int num_params; | ||
209 | |||
210 | /* storage for user space parameter values */ | ||
211 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
212 | |||
213 | u64 src_smmu_addr; | ||
214 | u32 src_param_offset; | ||
215 | u32 src_param_lines; | ||
216 | |||
217 | u64 src_vaddr; | ||
218 | |||
219 | u64 dest_vaddr; | ||
220 | u64 dest_size; | ||
221 | |||
222 | u32 obj_ids[MAX_CDE_OBJ_IDS]; | ||
223 | int num_obj_ids; | ||
224 | |||
225 | u64 backing_store_vaddr; | ||
226 | |||
227 | struct nvhost_gpfifo *init_cmd; | ||
228 | int init_cmd_num_entries; | ||
229 | |||
230 | struct nvhost_gpfifo *convert_cmd; | ||
231 | int convert_cmd_num_entries; | ||
232 | |||
233 | struct kobj_attribute attr; | ||
234 | }; | ||
235 | |||
236 | struct gk20a_cde_app { | ||
237 | bool initialised; | ||
238 | struct mutex mutex; | ||
239 | struct vm_gk20a *vm; | ||
240 | |||
241 | struct gk20a_cde_ctx cde_ctx[1]; | ||
242 | int cde_ctx_ptr; | ||
243 | }; | ||
244 | |||
245 | int gk20a_cde_destroy(struct gk20a *g); | ||
246 | int gk20a_init_cde_support(struct gk20a *g); | ||
247 | int gk20a_cde_reload(struct gk20a *g); | ||
248 | int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | ||
249 | s32 dst_kind, u64 dst_word_offset, | ||
250 | u32 dst_size, struct nvhost_fence *fence, | ||
251 | u32 __flags, struct gk20a_cde_param *params, | ||
252 | int num_params, struct gk20a_fence **fence_out); | ||
253 | |||
254 | #endif | ||