1 files changed, 148 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
new file mode 100644
index 00000000..4a16abd1
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -0,0 +1,148 @@
+/*
+ * GP10B CDE
+ *
+ * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "gk20a/gk20a.h"
+#include "cde_gp10b.h"
+enum gp10b_programs {
+        GP10B_PROG_HPASS              = 0,
+        GP10B_PROG_HPASS_4K           = 1,
+        GP10B_PROG_VPASS              = 2,
+        GP10B_PROG_VPASS_4K           = 3,
+        GP10B_PROG_HPASS_DEBUG        = 4,
+        GP10B_PROG_HPASS_4K_DEBUG     = 5,
+        GP10B_PROG_VPASS_DEBUG        = 6,
+        GP10B_PROG_VPASS_4K_DEBUG     = 7,
+        GP10B_PROG_PASSTHROUGH        = 8,
+};
+static void gp10b_cde_get_program_numbers(struct gk20a *g,
+                                          u32 block_height_log2,
+                                          int *hprog_out, int *vprog_out)
+{
+        int hprog, vprog;
+        if (g->cde_app.shader_parameter == 1) {
+                hprog = GP10B_PROG_PASSTHROUGH;
+                vprog = GP10B_PROG_PASSTHROUGH;
+        } else {
+                hprog = GP10B_PROG_HPASS;
+                vprog = GP10B_PROG_VPASS;
+                if (g->cde_app.shader_parameter == 2) {
+                        hprog = GP10B_PROG_HPASS_DEBUG;
+                        vprog = GP10B_PROG_VPASS_DEBUG;
+                }
+                if (g->mm.bypass_smmu) {
+                        if (!g->mm.disable_bigpage) {
+                                gk20a_warn(g->dev,
+                                           "when bypass_smmu is 1, disable_bigpage must be 1 too");
+                        }
+                        hprog |= 1;
+                        vprog |= 1;
+                }
+        }
+        *hprog_out = hprog;
+        *vprog_out = vprog;
+}
+static bool gp10b_need_scatter_buffer(struct gk20a *g)
+{
+        return g->mm.bypass_smmu;
+}
+static u8 parity(u32 a)
+{
+        a ^= a>>16u;
+        a ^= a>>8u;
+        a ^= a>>4u;
+        a &= 0xfu;
+        return (0x6996u >> a) & 1u;
+}
+static int gp10b_populate_scatter_buffer(struct gk20a *g,
+                                         struct sg_table *sgt,
+                                         size_t surface_size,
+                                         void *scatter_buffer_ptr,
+                                         size_t scatter_buffer_size)
+{
+        /* map scatter buffer to CPU VA and fill it */
+        const u32 page_size_log2 = 12;
+        const u32 page_size = 1 << page_size_log2;
+        const u32 page_size_shift = page_size_log2 - 7u;
+        /* 0011 1111 1111 1111 1111 1110 0100 1000 */
+        const u32 getSliceMaskGP10B = 0x3ffffe48;
+        u8 *scatter_buffer = scatter_buffer_ptr;
+        size_t i;
+        struct scatterlist *sg = NULL;
+        u8 d = 0;
+        size_t page = 0;
+        size_t pages_left;
+        surface_size = round_up(surface_size, page_size);
+        pages_left = surface_size >> page_size_log2;
+        if ((pages_left >> 3) > scatter_buffer_size)
+            return -ENOMEM;
+        for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+                unsigned int j;
+                u64 surf_pa = sg_phys(sg);
+                unsigned int n = (int)(sg->length >> page_size_log2);
+                gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
+                for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
+                        u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
+                        u8 scatter_bit = parity(addr);
+                        u8 bit = page & 7;
+                        d |= scatter_bit << bit;
+                        if (bit == 7) {
+                                scatter_buffer[page >> 3] = d;
+                                d = 0;
+                        }
+                        ++page;
+                        --pages_left;
+                }
+                if (pages_left == 0)
+                        break;
+        }
+        /* write the last byte in case the number of pages is not divisible by 8 */
+        if ((page & 7) != 0)
+                scatter_buffer[page >> 3] = d;
+#if defined(GK20A_DEBUG)
+        if (unlikely(gpu_dbg_cde & gk20a_dbg_mask)) {
+                gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:");
+                for (i=0; i < page>>3; i++) {
+                        gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]);
+                }
+        }
+#endif
+        return 0;
+}
+void gp10b_init_cde_ops(struct gpu_ops *gops)
+{
+        gops->cde.get_program_numbers = gp10b_cde_get_program_numbers;
+        gops->cde.need_scatter_buffer = gp10b_need_scatter_buffer;
+        gops->cde.populate_scatter_buffer = gp10b_populate_scatter_buffer;
+}

diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c new file mode 100644 index 00000000..4a16abd1 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -0,0 +1,148 @@
	1	/*
	2	* GP10B CDE
	3	*
	4	* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* This program is free software; you can redistribute it and/or modify it
	7	* under the terms and conditions of the GNU General Public License,
	8	* version 2, as published by the Free Software Foundation.
	9	*
	10	* This program is distributed in the hope it will be useful, but WITHOUT
	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	13	* more details.
	14	*/
	15
	16	#include "gk20a/gk20a.h"
	17	#include "cde_gp10b.h"
	18
	19	enum gp10b_programs {
	20	GP10B_PROG_HPASS = 0,
	21	GP10B_PROG_HPASS_4K = 1,
	22	GP10B_PROG_VPASS = 2,
	23	GP10B_PROG_VPASS_4K = 3,
	24	GP10B_PROG_HPASS_DEBUG = 4,
	25	GP10B_PROG_HPASS_4K_DEBUG = 5,
	26	GP10B_PROG_VPASS_DEBUG = 6,
	27	GP10B_PROG_VPASS_4K_DEBUG = 7,
	28	GP10B_PROG_PASSTHROUGH = 8,
	29	};
	30
	31	static void gp10b_cde_get_program_numbers(struct gk20a *g,
	32	u32 block_height_log2,
	33	int hprog_out, int vprog_out)
	34	{
	35	int hprog, vprog;
	36
	37	if (g->cde_app.shader_parameter == 1) {
	38	hprog = GP10B_PROG_PASSTHROUGH;
	39	vprog = GP10B_PROG_PASSTHROUGH;
	40	} else {
	41	hprog = GP10B_PROG_HPASS;
	42	vprog = GP10B_PROG_VPASS;
	43	if (g->cde_app.shader_parameter == 2) {
	44	hprog = GP10B_PROG_HPASS_DEBUG;
	45	vprog = GP10B_PROG_VPASS_DEBUG;
	46	}
	47	if (g->mm.bypass_smmu) {
	48	if (!g->mm.disable_bigpage) {
	49	gk20a_warn(g->dev,
	50	"when bypass_smmu is 1, disable_bigpage must be 1 too");
	51	}
	52	hprog \|= 1;
	53	vprog \|= 1;
	54	}
	55	}
	56
	57	*hprog_out = hprog;
	58	*vprog_out = vprog;
	59	}
	60
	61	static bool gp10b_need_scatter_buffer(struct gk20a *g)
	62	{
	63	return g->mm.bypass_smmu;
	64	}
	65
	66	static u8 parity(u32 a)
	67	{
	68	a ^= a>>16u;
	69	a ^= a>>8u;
	70	a ^= a>>4u;
	71	a &= 0xfu;
	72	return (0x6996u >> a) & 1u;
	73	}
	74
	75	static int gp10b_populate_scatter_buffer(struct gk20a *g,
	76	struct sg_table *sgt,
	77	size_t surface_size,
	78	void *scatter_buffer_ptr,
	79	size_t scatter_buffer_size)
	80	{
	81	/* map scatter buffer to CPU VA and fill it */
	82	const u32 page_size_log2 = 12;
	83	const u32 page_size = 1 << page_size_log2;
	84	const u32 page_size_shift = page_size_log2 - 7u;
	85
	86	/* 0011 1111 1111 1111 1111 1110 0100 1000 */
	87	const u32 getSliceMaskGP10B = 0x3ffffe48;
	88	u8 *scatter_buffer = scatter_buffer_ptr;
	89
	90	size_t i;
	91	struct scatterlist *sg = NULL;
	92	u8 d = 0;
	93	size_t page = 0;
	94	size_t pages_left;
	95
	96	surface_size = round_up(surface_size, page_size);
	97
	98	pages_left = surface_size >> page_size_log2;
	99	if ((pages_left >> 3) > scatter_buffer_size)
	100	return -ENOMEM;
	101
	102	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
	103	unsigned int j;
	104	u64 surf_pa = sg_phys(sg);
	105	unsigned int n = (int)(sg->length >> page_size_log2);
	106
	107	gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
	108
	109	for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
	110	u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
	111	u8 scatter_bit = parity(addr);
	112	u8 bit = page & 7;
	113
	114	d \|= scatter_bit << bit;
	115	if (bit == 7) {
	116	scatter_buffer[page >> 3] = d;
	117	d = 0;
	118	}
	119
	120	++page;
	121	--pages_left;
	122	}
	123
	124	if (pages_left == 0)
	125	break;
	126	}
	127
	128	/* write the last byte in case the number of pages is not divisible by 8 */
	129	if ((page & 7) != 0)
	130	scatter_buffer[page >> 3] = d;
	131
	132	#if defined(GK20A_DEBUG)
	133	if (unlikely(gpu_dbg_cde & gk20a_dbg_mask)) {
	134	gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:");
	135	for (i=0; i < page>>3; i++) {
	136	gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]);
	137	}
	138	}
	139	#endif
	140	return 0;
	141	}
	142
	143	void gp10b_init_cde_ops(struct gpu_ops *gops)
	144	{
	145	gops->cde.get_program_numbers = gp10b_cde_get_program_numbers;
	146	gops->cde.need_scatter_buffer = gp10b_need_scatter_buffer;
	147	gops->cde.populate_scatter_buffer = gp10b_populate_scatter_buffer;
	148	}