1 files changed, 326 insertions, 0 deletions
diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h
new file mode 100644
index 0000000..5928b62
--- /dev/null
+++ b/include/os/linux/cde.h
@@ -0,0 +1,326 @@
+/*
+ * GK20A color decompression engine support
+ *
+ * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _CDE_GK20A_H_
+#define _CDE_GK20A_H_
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#define MAX_CDE_BUFS            10
+#define MAX_CDE_PARAMS          64
+#define MAX_CDE_USER_PARAMS     40
+#define MAX_CDE_ARRAY_ENTRIES   9
+/*
+ * The size of the context ring buffer that is dedicated for handling cde
+ * jobs.  Re-using a context (=channel) for a differnt cde job forces a cpu
+ * wait on the previous job to that channel, so increasing this value
+ * reduces the likelihood of stalls.
+ */
+#define NUM_CDE_CONTEXTS        4
+struct dma_buf;
+struct device;
+struct nvgpu_os_linux;
+struct gk20a;
+struct gk20a_fence;
+struct nvgpu_channel_fence;
+struct channel_gk20a;
+struct vm_gk20a;
+struct nvgpu_gpfifo_entry;
+/*
+ * this element defines a buffer that is allocated and mapped into gpu address
+ * space. data_byte_offset defines the beginning of the buffer inside the
+ * firmare. num_bytes defines how many bytes the firmware contains.
+ *
+ * If data_byte_offset is zero, we allocate an empty buffer.
+ */
+struct gk20a_cde_hdr_buf {
+        u64 data_byte_offset;
+        u64 num_bytes;
+};
+/*
+ * this element defines a constant patching in buffers. It basically
+ * computes physical address to <source_buf>+source_byte_offset. The
+ * address is then modified into patch value as per:
+ *    value = (current_value & ~mask) | (address << shift) & mask .
+ *
+ * The type field defines the register size as:
+ *  0=u32,
+ *  1=u64 (little endian),
+ *  2=u64 (big endian)
+ */
+struct gk20a_cde_hdr_replace {
+        u32 target_buf;
+        u32 source_buf;
+        s32 shift;
+        u32 type;
+        u64 target_byte_offset;
+        u64 source_byte_offset;
+        u64 mask;
+};
+enum {
+        TYPE_PARAM_TYPE_U32 = 0,
+        TYPE_PARAM_TYPE_U64_LITTLE,
+        TYPE_PARAM_TYPE_U64_BIG
+};
+/*
+ * this element defines a runtime patching in buffers. Parameters with id from
+ * 0 to 1024 are reserved for special usage as follows:
+ *   0 = comptags_per_cacheline,
+ *   1 = slices_per_fbp,
+ *   2 = num_fbps
+ *   3 = source buffer first page offset
+ *   4 = source buffer block height log2
+ *   5 = backing store memory address
+ *   6 = destination memory address
+ *   7 = destination size (bytes)
+ *   8 = backing store size (bytes)
+ *   9 = cache line size
+ *
+ * Parameters above id 1024 are user-specified. I.e. they determine where a
+ * parameters from user space should be placed in buffers, what is their
+ * type, etc.
+ *
+ * Once the value is available, we add data_offset to the value.
+ *
+ * The value address is then modified into patch value as per:
+ *    value = (current_value & ~mask) | (address << shift) & mask .
+ *
+ * The type field defines the register size as:
+ *  0=u32,
+ *  1=u64 (little endian),
+ *  2=u64 (big endian)
+ */
+struct gk20a_cde_hdr_param {
+        u32 id;
+        u32 target_buf;
+        s32 shift;
+        u32 type;
+        s64 data_offset;
+        u64 target_byte_offset;
+        u64 mask;
+};
+enum {
+        TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
+        TYPE_PARAM_GPU_CONFIGURATION,
+        TYPE_PARAM_FIRSTPAGEOFFSET,
+        TYPE_PARAM_NUMPAGES,
+        TYPE_PARAM_BACKINGSTORE,
+        TYPE_PARAM_DESTINATION,
+        TYPE_PARAM_DESTINATION_SIZE,
+        TYPE_PARAM_BACKINGSTORE_SIZE,
+        TYPE_PARAM_SOURCE_SMMU_ADDR,
+        TYPE_PARAM_BACKINGSTORE_BASE_HW,
+        TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
+        TYPE_PARAM_SCATTERBUFFER,
+        TYPE_PARAM_SCATTERBUFFER_SIZE,
+        NUM_RESERVED_PARAMS = 1024,
+};
+/*
+ * This header element defines a command. The op field determines whether the
+ * element is defining an init (0) or convert command (1). data_byte_offset
+ * denotes the beginning address of command elements in the file.
+ */
+struct gk20a_cde_hdr_command {
+        u32 op;
+        u32 num_entries;
+        u64 data_byte_offset;
+};
+enum {
+        TYPE_BUF_COMMAND_INIT = 0,
+        TYPE_BUF_COMMAND_CONVERT,
+        TYPE_BUF_COMMAND_NOOP
+};
+/*
+ * This is a command element defines one entry inside push buffer. target_buf
+ * defines the buffer including the pushbuffer entries, target_byte_offset the
+ * offset inside the buffer and num_bytes the number of words in the buffer.
+ */
+struct gk20a_cde_cmd_elem {
+        u32 target_buf;
+        u32 padding;
+        u64 target_byte_offset;
+        u64 num_bytes;
+};
+/*
+ * This element is used for storing a small array of data.
+ */
+enum {
+        ARRAY_PROGRAM_OFFSET = 0,
+        ARRAY_REGISTER_COUNT,
+        ARRAY_LAUNCH_COMMAND,
+        NUM_CDE_ARRAYS
+};
+struct gk20a_cde_hdr_array {
+        u32 id;
+        u32 data[MAX_CDE_ARRAY_ENTRIES];
+};
+/*
+ * Following defines a single header element. Each element has a type and
+ * some of the data structures.
+ */
+struct gk20a_cde_hdr_elem {
+        u32 type;
+        u32 padding;
+        union {
+                struct gk20a_cde_hdr_buf buf;
+                struct gk20a_cde_hdr_replace replace;
+                struct gk20a_cde_hdr_param param;
+                u32 required_class;
+                struct gk20a_cde_hdr_command command;
+                struct gk20a_cde_hdr_array array;
+        };
+};
+enum {
+        TYPE_BUF = 0,
+        TYPE_REPLACE,
+        TYPE_PARAM,
+        TYPE_REQUIRED_CLASS,
+        TYPE_COMMAND,
+        TYPE_ARRAY
+};
+struct gk20a_cde_param {
+        u32 id;
+        u32 padding;
+        u64 value;
+};
+struct gk20a_cde_ctx {
+        struct nvgpu_os_linux *l;
+        struct device *dev;
+        /* channel related data */
+        struct channel_gk20a *ch;
+        struct tsg_gk20a *tsg;
+        struct vm_gk20a *vm;
+        /* buf converter configuration */
+        struct nvgpu_mem mem[MAX_CDE_BUFS];
+        unsigned int num_bufs;
+        /* buffer patching params (where should patching be done) */
+        struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
+        unsigned int num_params;
+        /* storage for user space parameter values */
+        u32 user_param_values[MAX_CDE_USER_PARAMS];
+        u32 surf_param_offset;
+        u32 surf_param_lines;
+        u64 surf_vaddr;
+        u64 compbit_vaddr;
+        u64 compbit_size;
+        u64 scatterbuffer_vaddr;
+        u64 scatterbuffer_size;
+        u64 backing_store_vaddr;
+        struct nvgpu_gpfifo_entry *init_convert_cmd;
+        int init_cmd_num_entries;
+        struct nvgpu_gpfifo_entry *convert_cmd;
+        int convert_cmd_num_entries;
+        struct kobj_attribute attr;
+        bool init_cmd_executed;
+        struct nvgpu_list_node list;
+        bool is_temporary;
+        bool in_use;
+        struct delayed_work ctx_deleter_work;
+};
+static inline struct gk20a_cde_ctx *
+gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
+{
+        return (struct gk20a_cde_ctx *)
+                ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
+};
+struct gk20a_cde_app {
+        bool initialised;
+        struct nvgpu_mutex mutex;
+        struct nvgpu_list_node free_contexts;
+        struct nvgpu_list_node used_contexts;
+        unsigned int ctx_count;
+        unsigned int ctx_usecount;
+        unsigned int ctx_count_top;
+        u32 firmware_version;
+        u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
+        u32 shader_parameter;
+};
+void gk20a_cde_destroy(struct nvgpu_os_linux *l);
+void gk20a_cde_suspend(struct nvgpu_os_linux *l);
+int gk20a_init_cde_support(struct nvgpu_os_linux *l);
+int gk20a_cde_reload(struct nvgpu_os_linux *l);
+int gk20a_cde_convert(struct nvgpu_os_linux *l,
+                struct dma_buf *compbits_buf,
+                u64 compbits_byte_offset,
+                u64 scatterbuffer_byte_offset,
+                struct nvgpu_channel_fence *fence,
+                u32 __flags, struct gk20a_cde_param *params,
+                int num_params, struct gk20a_fence **fence_out);
+int gk20a_prepare_compressible_read(
+                struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
+                u64 compbits_hoffset, u64 compbits_voffset,
+                u64 scatterbuffer_offset,
+                u32 width, u32 height, u32 block_height_log2,
+                u32 submit_flags, struct nvgpu_channel_fence *fence,
+                u32 *valid_compbits, u32 *zbc_color,
+                struct gk20a_fence **fence_out);
+int gk20a_mark_compressible_write(
+                struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
+                u32 zbc_color);
+int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
+#endif

diff --git a/include/os/linux/cde.h b/include/os/linux/cde.h new file mode 100644 index 0000000..5928b62 --- /dev/null +++ b/include/os/linux/cde.h
@@ -0,0 +1,326 @@
	1	/*
	2	* GK20A color decompression engine support
	3	*
	4	* Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	17	*/
	18
	19	#ifndef _CDE_GK20A_H_
	20	#define _CDE_GK20A_H_
	21
	22	#include <nvgpu/nvgpu_mem.h>
	23	#include <nvgpu/list.h>
	24	#include <nvgpu/lock.h>
	25
	26	#include <linux/kobject.h>
	27	#include <linux/workqueue.h>
	28
	29	#define MAX_CDE_BUFS 10
	30	#define MAX_CDE_PARAMS 64
	31	#define MAX_CDE_USER_PARAMS 40
	32	#define MAX_CDE_ARRAY_ENTRIES 9
	33
	34	/*
	35	* The size of the context ring buffer that is dedicated for handling cde
	36	* jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
	37	* wait on the previous job to that channel, so increasing this value
	38	* reduces the likelihood of stalls.
	39	*/
	40	#define NUM_CDE_CONTEXTS 4
	41
	42	struct dma_buf;
	43	struct device;
	44	struct nvgpu_os_linux;
	45	struct gk20a;
	46	struct gk20a_fence;
	47	struct nvgpu_channel_fence;
	48	struct channel_gk20a;
	49	struct vm_gk20a;
	50	struct nvgpu_gpfifo_entry;
	51
	52	/*
	53	* this element defines a buffer that is allocated and mapped into gpu address
	54	* space. data_byte_offset defines the beginning of the buffer inside the
	55	* firmare. num_bytes defines how many bytes the firmware contains.
	56	*
	57	* If data_byte_offset is zero, we allocate an empty buffer.
	58	*/
	59
	60	struct gk20a_cde_hdr_buf {
	61	u64 data_byte_offset;
	62	u64 num_bytes;
	63	};
	64
	65	/*
	66	* this element defines a constant patching in buffers. It basically
	67	* computes physical address to <source_buf>+source_byte_offset. The
	68	* address is then modified into patch value as per:
	69	* value = (current_value & ~mask) \| (address << shift) & mask .
	70	*
	71	* The type field defines the register size as:
	72	* 0=u32,
	73	* 1=u64 (little endian),
	74	* 2=u64 (big endian)
	75	*/
	76
	77	struct gk20a_cde_hdr_replace {
	78	u32 target_buf;
	79	u32 source_buf;
	80	s32 shift;
	81	u32 type;
	82	u64 target_byte_offset;
	83	u64 source_byte_offset;
	84	u64 mask;
	85	};
	86
	87	enum {
	88	TYPE_PARAM_TYPE_U32 = 0,
	89	TYPE_PARAM_TYPE_U64_LITTLE,
	90	TYPE_PARAM_TYPE_U64_BIG
	91	};
	92
	93	/*
	94	* this element defines a runtime patching in buffers. Parameters with id from
	95	* 0 to 1024 are reserved for special usage as follows:
	96	* 0 = comptags_per_cacheline,
	97	* 1 = slices_per_fbp,
	98	* 2 = num_fbps
	99	* 3 = source buffer first page offset
	100	* 4 = source buffer block height log2
	101	* 5 = backing store memory address
	102	* 6 = destination memory address
	103	* 7 = destination size (bytes)
	104	* 8 = backing store size (bytes)
	105	* 9 = cache line size
	106	*
	107	* Parameters above id 1024 are user-specified. I.e. they determine where a
	108	* parameters from user space should be placed in buffers, what is their
	109	* type, etc.
	110	*
	111	* Once the value is available, we add data_offset to the value.
	112	*
	113	* The value address is then modified into patch value as per:
	114	* value = (current_value & ~mask) \| (address << shift) & mask .
	115	*
	116	* The type field defines the register size as:
	117	* 0=u32,
	118	* 1=u64 (little endian),
	119	* 2=u64 (big endian)
	120	*/
	121
	122	struct gk20a_cde_hdr_param {
	123	u32 id;
	124	u32 target_buf;
	125	s32 shift;
	126	u32 type;
	127	s64 data_offset;
	128	u64 target_byte_offset;
	129	u64 mask;
	130	};
	131
	132	enum {
	133	TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
	134	TYPE_PARAM_GPU_CONFIGURATION,
	135	TYPE_PARAM_FIRSTPAGEOFFSET,
	136	TYPE_PARAM_NUMPAGES,
	137	TYPE_PARAM_BACKINGSTORE,
	138	TYPE_PARAM_DESTINATION,
	139	TYPE_PARAM_DESTINATION_SIZE,
	140	TYPE_PARAM_BACKINGSTORE_SIZE,
	141	TYPE_PARAM_SOURCE_SMMU_ADDR,
	142	TYPE_PARAM_BACKINGSTORE_BASE_HW,
	143	TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
	144	TYPE_PARAM_SCATTERBUFFER,
	145	TYPE_PARAM_SCATTERBUFFER_SIZE,
	146	NUM_RESERVED_PARAMS = 1024,
	147	};
	148
	149	/*
	150	* This header element defines a command. The op field determines whether the
	151	* element is defining an init (0) or convert command (1). data_byte_offset
	152	* denotes the beginning address of command elements in the file.
	153	*/
	154
	155	struct gk20a_cde_hdr_command {
	156	u32 op;
	157	u32 num_entries;
	158	u64 data_byte_offset;
	159	};
	160
	161	enum {
	162	TYPE_BUF_COMMAND_INIT = 0,
	163	TYPE_BUF_COMMAND_CONVERT,
	164	TYPE_BUF_COMMAND_NOOP
	165	};
	166
	167	/*
	168	* This is a command element defines one entry inside push buffer. target_buf
	169	* defines the buffer including the pushbuffer entries, target_byte_offset the
	170	* offset inside the buffer and num_bytes the number of words in the buffer.
	171	*/
	172
	173	struct gk20a_cde_cmd_elem {
	174	u32 target_buf;
	175	u32 padding;
	176	u64 target_byte_offset;
	177	u64 num_bytes;
	178	};
	179
	180	/*
	181	* This element is used for storing a small array of data.
	182	*/
	183
	184	enum {
	185	ARRAY_PROGRAM_OFFSET = 0,
	186	ARRAY_REGISTER_COUNT,
	187	ARRAY_LAUNCH_COMMAND,
	188	NUM_CDE_ARRAYS
	189	};
	190
	191	struct gk20a_cde_hdr_array {
	192	u32 id;
	193	u32 data[MAX_CDE_ARRAY_ENTRIES];
	194	};
	195
	196	/*
	197	* Following defines a single header element. Each element has a type and
	198	* some of the data structures.
	199	*/
	200
	201	struct gk20a_cde_hdr_elem {
	202	u32 type;
	203	u32 padding;
	204	union {
	205	struct gk20a_cde_hdr_buf buf;
	206	struct gk20a_cde_hdr_replace replace;
	207	struct gk20a_cde_hdr_param param;
	208	u32 required_class;
	209	struct gk20a_cde_hdr_command command;
	210	struct gk20a_cde_hdr_array array;
	211	};
	212	};
	213
	214	enum {
	215	TYPE_BUF = 0,
	216	TYPE_REPLACE,
	217	TYPE_PARAM,
	218	TYPE_REQUIRED_CLASS,
	219	TYPE_COMMAND,
	220	TYPE_ARRAY
	221	};
	222
	223	struct gk20a_cde_param {
	224	u32 id;
	225	u32 padding;
	226	u64 value;
	227	};
	228
	229	struct gk20a_cde_ctx {
	230	struct nvgpu_os_linux *l;
	231	struct device *dev;
	232
	233	/* channel related data */
	234	struct channel_gk20a *ch;
	235	struct tsg_gk20a *tsg;
	236	struct vm_gk20a *vm;
	237
	238	/* buf converter configuration */
	239	struct nvgpu_mem mem[MAX_CDE_BUFS];
	240	unsigned int num_bufs;
	241
	242	/* buffer patching params (where should patching be done) */
	243	struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
	244	unsigned int num_params;
	245
	246	/* storage for user space parameter values */
	247	u32 user_param_values[MAX_CDE_USER_PARAMS];
	248
	249	u32 surf_param_offset;
	250	u32 surf_param_lines;
	251	u64 surf_vaddr;
	252
	253	u64 compbit_vaddr;
	254	u64 compbit_size;
	255
	256	u64 scatterbuffer_vaddr;
	257	u64 scatterbuffer_size;
	258
	259	u64 backing_store_vaddr;
	260
	261	struct nvgpu_gpfifo_entry *init_convert_cmd;
	262	int init_cmd_num_entries;
	263
	264	struct nvgpu_gpfifo_entry *convert_cmd;
	265	int convert_cmd_num_entries;
	266
	267	struct kobj_attribute attr;
	268
	269	bool init_cmd_executed;
	270
	271	struct nvgpu_list_node list;
	272	bool is_temporary;
	273	bool in_use;
	274	struct delayed_work ctx_deleter_work;
	275	};
	276
	277	static inline struct gk20a_cde_ctx *
	278	gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
	279	{
	280	return (struct gk20a_cde_ctx *)
	281	((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
	282	};
	283
	284	struct gk20a_cde_app {
	285	bool initialised;
	286	struct nvgpu_mutex mutex;
	287
	288	struct nvgpu_list_node free_contexts;
	289	struct nvgpu_list_node used_contexts;
	290	unsigned int ctx_count;
	291	unsigned int ctx_usecount;
	292	unsigned int ctx_count_top;
	293
	294	u32 firmware_version;
	295
	296	u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
	297
	298	u32 shader_parameter;
	299	};
	300
	301	void gk20a_cde_destroy(struct nvgpu_os_linux *l);
	302	void gk20a_cde_suspend(struct nvgpu_os_linux *l);
	303	int gk20a_init_cde_support(struct nvgpu_os_linux *l);
	304	int gk20a_cde_reload(struct nvgpu_os_linux *l);
	305	int gk20a_cde_convert(struct nvgpu_os_linux *l,
	306	struct dma_buf *compbits_buf,
	307	u64 compbits_byte_offset,
	308	u64 scatterbuffer_byte_offset,
	309	struct nvgpu_channel_fence *fence,
	310	u32 __flags, struct gk20a_cde_param *params,
	311	int num_params, struct gk20a_fence **fence_out);
	312
	313	int gk20a_prepare_compressible_read(
	314	struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
	315	u64 compbits_hoffset, u64 compbits_voffset,
	316	u64 scatterbuffer_offset,
	317	u32 width, u32 height, u32 block_height_log2,
	318	u32 submit_flags, struct nvgpu_channel_fence *fence,
	319	u32 valid_compbits, u32 zbc_color,
	320	struct gk20a_fence **fence_out);
	321	int gk20a_mark_compressible_write(
	322	struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
	323	u32 zbc_color);
	324	int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
	325
	326	#endif