diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-28 18:24:25 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-28 18:24:25 -0400 |
| commit | 01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 (patch) | |
| tree | 4ef34501728a087be24f4ba0af90f91486bf780b /include/os/linux/cde.h | |
| parent | 306a03d18b305e4e573be3b2931978fa10679eb9 (diff) | |
Include nvgpu headers
These are needed to build on NVIDIA's Jetson boards for the time
being. Only a couple structs are required, so it should be fairly
easy to remove this dependency at some point in the future.
Diffstat (limited to 'include/os/linux/cde.h')
| -rw-r--r-- | include/os/linux/cde.h | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h new file mode 100644 index 0000000..5928b62 --- /dev/null +++ b/include/os/linux/cde.h | |||
| @@ -0,0 +1,326 @@ | |||
| 1 | /* | ||
| 2 | * GK20A color decompression engine support | ||
| 3 | * | ||
| 4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms and conditions of the GNU General Public License, | ||
| 8 | * version 2, as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef _CDE_GK20A_H_ | ||
| 20 | #define _CDE_GK20A_H_ | ||
| 21 | |||
| 22 | #include <nvgpu/nvgpu_mem.h> | ||
| 23 | #include <nvgpu/list.h> | ||
| 24 | #include <nvgpu/lock.h> | ||
| 25 | |||
| 26 | #include <linux/kobject.h> | ||
| 27 | #include <linux/workqueue.h> | ||
| 28 | |||
| 29 | #define MAX_CDE_BUFS 10 | ||
| 30 | #define MAX_CDE_PARAMS 64 | ||
| 31 | #define MAX_CDE_USER_PARAMS 40 | ||
| 32 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
| 33 | |||
| 34 | /* | ||
| 35 | * The size of the context ring buffer that is dedicated for handling cde | ||
| 36 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
| 37 | * wait on the previous job to that channel, so increasing this value | ||
| 38 | * reduces the likelihood of stalls. | ||
| 39 | */ | ||
| 40 | #define NUM_CDE_CONTEXTS 4 | ||
| 41 | |||
| 42 | struct dma_buf; | ||
| 43 | struct device; | ||
| 44 | struct nvgpu_os_linux; | ||
| 45 | struct gk20a; | ||
| 46 | struct gk20a_fence; | ||
| 47 | struct nvgpu_channel_fence; | ||
| 48 | struct channel_gk20a; | ||
| 49 | struct vm_gk20a; | ||
| 50 | struct nvgpu_gpfifo_entry; | ||
| 51 | |||
| 52 | /* | ||
| 53 | * this element defines a buffer that is allocated and mapped into gpu address | ||
| 54 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
| 55 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
| 56 | * | ||
| 57 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
| 58 | */ | ||
| 59 | |||
| 60 | struct gk20a_cde_hdr_buf { | ||
| 61 | u64 data_byte_offset; | ||
| 62 | u64 num_bytes; | ||
| 63 | }; | ||
| 64 | |||
| 65 | /* | ||
| 66 | * this element defines a constant patching in buffers. It basically | ||
| 67 | * computes physical address to <source_buf>+source_byte_offset. The | ||
| 68 | * address is then modified into patch value as per: | ||
| 69 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
| 70 | * | ||
| 71 | * The type field defines the register size as: | ||
| 72 | * 0=u32, | ||
| 73 | * 1=u64 (little endian), | ||
| 74 | * 2=u64 (big endian) | ||
| 75 | */ | ||
| 76 | |||
| 77 | struct gk20a_cde_hdr_replace { | ||
| 78 | u32 target_buf; | ||
| 79 | u32 source_buf; | ||
| 80 | s32 shift; | ||
| 81 | u32 type; | ||
| 82 | u64 target_byte_offset; | ||
| 83 | u64 source_byte_offset; | ||
| 84 | u64 mask; | ||
| 85 | }; | ||
| 86 | |||
| 87 | enum { | ||
| 88 | TYPE_PARAM_TYPE_U32 = 0, | ||
| 89 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
| 90 | TYPE_PARAM_TYPE_U64_BIG | ||
| 91 | }; | ||
| 92 | |||
| 93 | /* | ||
| 94 | * this element defines a runtime patching in buffers. Parameters with id from | ||
| 95 | * 0 to 1024 are reserved for special usage as follows: | ||
| 96 | * 0 = comptags_per_cacheline, | ||
| 97 | * 1 = slices_per_fbp, | ||
| 98 | * 2 = num_fbps | ||
| 99 | * 3 = source buffer first page offset | ||
| 100 | * 4 = source buffer block height log2 | ||
| 101 | * 5 = backing store memory address | ||
| 102 | * 6 = destination memory address | ||
| 103 | * 7 = destination size (bytes) | ||
| 104 | * 8 = backing store size (bytes) | ||
| 105 | * 9 = cache line size | ||
| 106 | * | ||
| 107 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
| 108 | * parameters from user space should be placed in buffers, what is their | ||
| 109 | * type, etc. | ||
| 110 | * | ||
| 111 | * Once the value is available, we add data_offset to the value. | ||
| 112 | * | ||
| 113 | * The value address is then modified into patch value as per: | ||
| 114 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
| 115 | * | ||
| 116 | * The type field defines the register size as: | ||
| 117 | * 0=u32, | ||
| 118 | * 1=u64 (little endian), | ||
| 119 | * 2=u64 (big endian) | ||
| 120 | */ | ||
| 121 | |||
| 122 | struct gk20a_cde_hdr_param { | ||
| 123 | u32 id; | ||
| 124 | u32 target_buf; | ||
| 125 | s32 shift; | ||
| 126 | u32 type; | ||
| 127 | s64 data_offset; | ||
| 128 | u64 target_byte_offset; | ||
| 129 | u64 mask; | ||
| 130 | }; | ||
| 131 | |||
| 132 | enum { | ||
| 133 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
| 134 | TYPE_PARAM_GPU_CONFIGURATION, | ||
| 135 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
| 136 | TYPE_PARAM_NUMPAGES, | ||
| 137 | TYPE_PARAM_BACKINGSTORE, | ||
| 138 | TYPE_PARAM_DESTINATION, | ||
| 139 | TYPE_PARAM_DESTINATION_SIZE, | ||
| 140 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
| 141 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
| 142 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
| 143 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
| 144 | TYPE_PARAM_SCATTERBUFFER, | ||
| 145 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
| 146 | NUM_RESERVED_PARAMS = 1024, | ||
| 147 | }; | ||
| 148 | |||
| 149 | /* | ||
| 150 | * This header element defines a command. The op field determines whether the | ||
| 151 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
| 152 | * denotes the beginning address of command elements in the file. | ||
| 153 | */ | ||
| 154 | |||
| 155 | struct gk20a_cde_hdr_command { | ||
| 156 | u32 op; | ||
| 157 | u32 num_entries; | ||
| 158 | u64 data_byte_offset; | ||
| 159 | }; | ||
| 160 | |||
| 161 | enum { | ||
| 162 | TYPE_BUF_COMMAND_INIT = 0, | ||
| 163 | TYPE_BUF_COMMAND_CONVERT, | ||
| 164 | TYPE_BUF_COMMAND_NOOP | ||
| 165 | }; | ||
| 166 | |||
| 167 | /* | ||
| 168 | * This is a command element defines one entry inside push buffer. target_buf | ||
| 169 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
| 170 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
| 171 | */ | ||
| 172 | |||
| 173 | struct gk20a_cde_cmd_elem { | ||
| 174 | u32 target_buf; | ||
| 175 | u32 padding; | ||
| 176 | u64 target_byte_offset; | ||
| 177 | u64 num_bytes; | ||
| 178 | }; | ||
| 179 | |||
| 180 | /* | ||
| 181 | * This element is used for storing a small array of data. | ||
| 182 | */ | ||
| 183 | |||
| 184 | enum { | ||
| 185 | ARRAY_PROGRAM_OFFSET = 0, | ||
| 186 | ARRAY_REGISTER_COUNT, | ||
| 187 | ARRAY_LAUNCH_COMMAND, | ||
| 188 | NUM_CDE_ARRAYS | ||
| 189 | }; | ||
| 190 | |||
| 191 | struct gk20a_cde_hdr_array { | ||
| 192 | u32 id; | ||
| 193 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
| 194 | }; | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Following defines a single header element. Each element has a type and | ||
| 198 | * some of the data structures. | ||
| 199 | */ | ||
| 200 | |||
| 201 | struct gk20a_cde_hdr_elem { | ||
| 202 | u32 type; | ||
| 203 | u32 padding; | ||
| 204 | union { | ||
| 205 | struct gk20a_cde_hdr_buf buf; | ||
| 206 | struct gk20a_cde_hdr_replace replace; | ||
| 207 | struct gk20a_cde_hdr_param param; | ||
| 208 | u32 required_class; | ||
| 209 | struct gk20a_cde_hdr_command command; | ||
| 210 | struct gk20a_cde_hdr_array array; | ||
| 211 | }; | ||
| 212 | }; | ||
| 213 | |||
| 214 | enum { | ||
| 215 | TYPE_BUF = 0, | ||
| 216 | TYPE_REPLACE, | ||
| 217 | TYPE_PARAM, | ||
| 218 | TYPE_REQUIRED_CLASS, | ||
| 219 | TYPE_COMMAND, | ||
| 220 | TYPE_ARRAY | ||
| 221 | }; | ||
| 222 | |||
| 223 | struct gk20a_cde_param { | ||
| 224 | u32 id; | ||
| 225 | u32 padding; | ||
| 226 | u64 value; | ||
| 227 | }; | ||
| 228 | |||
| 229 | struct gk20a_cde_ctx { | ||
| 230 | struct nvgpu_os_linux *l; | ||
| 231 | struct device *dev; | ||
| 232 | |||
| 233 | /* channel related data */ | ||
| 234 | struct channel_gk20a *ch; | ||
| 235 | struct tsg_gk20a *tsg; | ||
| 236 | struct vm_gk20a *vm; | ||
| 237 | |||
| 238 | /* buf converter configuration */ | ||
| 239 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
| 240 | unsigned int num_bufs; | ||
| 241 | |||
| 242 | /* buffer patching params (where should patching be done) */ | ||
| 243 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
| 244 | unsigned int num_params; | ||
| 245 | |||
| 246 | /* storage for user space parameter values */ | ||
| 247 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
| 248 | |||
| 249 | u32 surf_param_offset; | ||
| 250 | u32 surf_param_lines; | ||
| 251 | u64 surf_vaddr; | ||
| 252 | |||
| 253 | u64 compbit_vaddr; | ||
| 254 | u64 compbit_size; | ||
| 255 | |||
| 256 | u64 scatterbuffer_vaddr; | ||
| 257 | u64 scatterbuffer_size; | ||
| 258 | |||
| 259 | u64 backing_store_vaddr; | ||
| 260 | |||
| 261 | struct nvgpu_gpfifo_entry *init_convert_cmd; | ||
| 262 | int init_cmd_num_entries; | ||
| 263 | |||
| 264 | struct nvgpu_gpfifo_entry *convert_cmd; | ||
| 265 | int convert_cmd_num_entries; | ||
| 266 | |||
| 267 | struct kobj_attribute attr; | ||
| 268 | |||
| 269 | bool init_cmd_executed; | ||
| 270 | |||
| 271 | struct nvgpu_list_node list; | ||
| 272 | bool is_temporary; | ||
| 273 | bool in_use; | ||
| 274 | struct delayed_work ctx_deleter_work; | ||
| 275 | }; | ||
| 276 | |||
| 277 | static inline struct gk20a_cde_ctx * | ||
| 278 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
| 279 | { | ||
| 280 | return (struct gk20a_cde_ctx *) | ||
| 281 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
| 282 | }; | ||
| 283 | |||
| 284 | struct gk20a_cde_app { | ||
| 285 | bool initialised; | ||
| 286 | struct nvgpu_mutex mutex; | ||
| 287 | |||
| 288 | struct nvgpu_list_node free_contexts; | ||
| 289 | struct nvgpu_list_node used_contexts; | ||
| 290 | unsigned int ctx_count; | ||
| 291 | unsigned int ctx_usecount; | ||
| 292 | unsigned int ctx_count_top; | ||
| 293 | |||
| 294 | u32 firmware_version; | ||
| 295 | |||
| 296 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
| 297 | |||
| 298 | u32 shader_parameter; | ||
| 299 | }; | ||
| 300 | |||
| 301 | void gk20a_cde_destroy(struct nvgpu_os_linux *l); | ||
| 302 | void gk20a_cde_suspend(struct nvgpu_os_linux *l); | ||
| 303 | int gk20a_init_cde_support(struct nvgpu_os_linux *l); | ||
| 304 | int gk20a_cde_reload(struct nvgpu_os_linux *l); | ||
| 305 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
| 306 | struct dma_buf *compbits_buf, | ||
| 307 | u64 compbits_byte_offset, | ||
| 308 | u64 scatterbuffer_byte_offset, | ||
| 309 | struct nvgpu_channel_fence *fence, | ||
| 310 | u32 __flags, struct gk20a_cde_param *params, | ||
| 311 | int num_params, struct gk20a_fence **fence_out); | ||
| 312 | |||
| 313 | int gk20a_prepare_compressible_read( | ||
| 314 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
| 315 | u64 compbits_hoffset, u64 compbits_voffset, | ||
| 316 | u64 scatterbuffer_offset, | ||
| 317 | u32 width, u32 height, u32 block_height_log2, | ||
| 318 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
| 319 | u32 *valid_compbits, u32 *zbc_color, | ||
| 320 | struct gk20a_fence **fence_out); | ||
| 321 | int gk20a_mark_compressible_write( | ||
| 322 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
| 323 | u32 zbc_color); | ||
| 324 | int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); | ||
| 325 | |||
| 326 | #endif | ||
