/* * GK20A color decompression engine support * * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef _CDE_GK20A_H_ #define _CDE_GK20A_H_ #include #include #include #include #include #define MAX_CDE_BUFS 10 #define MAX_CDE_PARAMS 64 #define MAX_CDE_USER_PARAMS 40 #define MAX_CDE_ARRAY_ENTRIES 9 /* * The size of the context ring buffer that is dedicated for handling cde * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu * wait on the previous job to that channel, so increasing this value * reduces the likelihood of stalls. */ #define NUM_CDE_CONTEXTS 4 struct dma_buf; struct device; struct nvgpu_os_linux; struct gk20a; struct gk20a_fence; struct nvgpu_channel_fence; struct channel_gk20a; struct vm_gk20a; struct nvgpu_gpfifo_entry; /* * this element defines a buffer that is allocated and mapped into gpu address * space. data_byte_offset defines the beginning of the buffer inside the * firmare. num_bytes defines how many bytes the firmware contains. * * If data_byte_offset is zero, we allocate an empty buffer. */ struct gk20a_cde_hdr_buf { u64 data_byte_offset; u64 num_bytes; }; /* * this element defines a constant patching in buffers. It basically * computes physical address to +source_byte_offset. The * address is then modified into patch value as per: * value = (current_value & ~mask) | (address << shift) & mask . * * The type field defines the register size as: * 0=u32, * 1=u64 (little endian), * 2=u64 (big endian) */ struct gk20a_cde_hdr_replace { u32 target_buf; u32 source_buf; s32 shift; u32 type; u64 target_byte_offset; u64 source_byte_offset; u64 mask; }; enum { TYPE_PARAM_TYPE_U32 = 0, TYPE_PARAM_TYPE_U64_LITTLE, TYPE_PARAM_TYPE_U64_BIG }; /* * this element defines a runtime patching in buffers. Parameters with id from * 0 to 1024 are reserved for special usage as follows: * 0 = comptags_per_cacheline, * 1 = slices_per_fbp, * 2 = num_fbps * 3 = source buffer first page offset * 4 = source buffer block height log2 * 5 = backing store memory address * 6 = destination memory address * 7 = destination size (bytes) * 8 = backing store size (bytes) * 9 = cache line size * * Parameters above id 1024 are user-specified. I.e. they determine where a * parameters from user space should be placed in buffers, what is their * type, etc. * * Once the value is available, we add data_offset to the value. * * The value address is then modified into patch value as per: * value = (current_value & ~mask) | (address << shift) & mask . * * The type field defines the register size as: * 0=u32, * 1=u64 (little endian), * 2=u64 (big endian) */ struct gk20a_cde_hdr_param { u32 id; u32 target_buf; s32 shift; u32 type; s64 data_offset; u64 target_byte_offset; u64 mask; }; enum { TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, TYPE_PARAM_GPU_CONFIGURATION, TYPE_PARAM_FIRSTPAGEOFFSET, TYPE_PARAM_NUMPAGES, TYPE_PARAM_BACKINGSTORE, TYPE_PARAM_DESTINATION, TYPE_PARAM_DESTINATION_SIZE, TYPE_PARAM_BACKINGSTORE_SIZE, TYPE_PARAM_SOURCE_SMMU_ADDR, TYPE_PARAM_BACKINGSTORE_BASE_HW, TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, TYPE_PARAM_SCATTERBUFFER, TYPE_PARAM_SCATTERBUFFER_SIZE, NUM_RESERVED_PARAMS = 1024, }; /* * This header element defines a command. The op field determines whether the * element is defining an init (0) or convert command (1). data_byte_offset * denotes the beginning address of command elements in the file. */ struct gk20a_cde_hdr_command { u32 op; u32 num_entries; u64 data_byte_offset; }; enum { TYPE_BUF_COMMAND_INIT = 0, TYPE_BUF_COMMAND_CONVERT, TYPE_BUF_COMMAND_NOOP }; /* * This is a command element defines one entry inside push buffer. target_buf * defines the buffer including the pushbuffer entries, target_byte_offset the * offset inside the buffer and num_bytes the number of words in the buffer. */ struct gk20a_cde_cmd_elem { u32 target_buf; u32 padding; u64 target_byte_offset; u64 num_bytes; }; /* * This element is used for storing a small array of data. */ enum { ARRAY_PROGRAM_OFFSET = 0, ARRAY_REGISTER_COUNT, ARRAY_LAUNCH_COMMAND, NUM_CDE_ARRAYS }; struct gk20a_cde_hdr_array { u32 id; u32 data[MAX_CDE_ARRAY_ENTRIES]; }; /* * Following defines a single header element. Each element has a type and * some of the data structures. */ struct gk20a_cde_hdr_elem { u32 type; u32 padding; union { struct gk20a_cde_hdr_buf buf; struct gk20a_cde_hdr_replace replace; struct gk20a_cde_hdr_param param; u32 required_class; struct gk20a_cde_hdr_command command; struct gk20a_cde_hdr_array array; }; }; enum { TYPE_BUF = 0, TYPE_REPLACE, TYPE_PARAM, TYPE_REQUIRED_CLASS, TYPE_COMMAND, TYPE_ARRAY }; struct gk20a_cde_param { u32 id; u32 padding; u64 value; }; struct gk20a_cde_ctx { struct nvgpu_os_linux *l; struct device *dev; /* channel related data */ struct channel_gk20a *ch; struct tsg_gk20a *tsg; struct vm_gk20a *vm; /* buf converter configuration */ struct nvgpu_mem mem[MAX_CDE_BUFS]; unsigned int num_bufs; /* buffer patching params (where should patching be done) */ struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; unsigned int num_params; /* storage for user space parameter values */ u32 user_param_values[MAX_CDE_USER_PARAMS]; u32 surf_param_offset; u32 surf_param_lines; u64 surf_vaddr; u64 compbit_vaddr; u64 compbit_size; u64 scatterbuffer_vaddr; u64 scatterbuffer_size; u64 backing_store_vaddr; struct nvgpu_gpfifo_entry *init_convert_cmd; int init_cmd_num_entries; struct nvgpu_gpfifo_entry *convert_cmd; int convert_cmd_num_entries; struct kobj_attribute attr; bool init_cmd_executed; struct nvgpu_list_node list; bool is_temporary; bool in_use; struct delayed_work ctx_deleter_work; }; static inline struct gk20a_cde_ctx * gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) { return (struct gk20a_cde_ctx *) ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); }; struct gk20a_cde_app { bool initialised; struct nvgpu_mutex mutex; struct nvgpu_list_node free_contexts; struct nvgpu_list_node used_contexts; unsigned int ctx_count; unsigned int ctx_usecount; unsigned int ctx_count_top; u32 firmware_version; u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; u32 shader_parameter; }; void gk20a_cde_destroy(struct nvgpu_os_linux *l); void gk20a_cde_suspend(struct nvgpu_os_linux *l); int gk20a_init_cde_support(struct nvgpu_os_linux *l); int gk20a_cde_reload(struct nvgpu_os_linux *l); int gk20a_cde_convert(struct nvgpu_os_linux *l, struct dma_buf *compbits_buf, u64 compbits_byte_offset, u64 scatterbuffer_byte_offset, struct nvgpu_channel_fence *fence, u32 __flags, struct gk20a_cde_param *params, int num_params, struct gk20a_fence **fence_out); int gk20a_prepare_compressible_read( struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, u64 compbits_hoffset, u64 compbits_voffset, u64 scatterbuffer_offset, u32 width, u32 height, u32 block_height_log2, u32 submit_flags, struct nvgpu_channel_fence *fence, u32 *valid_compbits, u32 *zbc_color, struct gk20a_fence **fence_out); int gk20a_mark_compressible_write( struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, u32 zbc_color); int nvgpu_cde_init_ops(struct nvgpu_os_linux *l); #endif