1 files changed, 160 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
new file mode 100644
index 00000000..ffae6e34
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
@@ -0,0 +1,160 @@
+/*
+ * GP10B CDE
+ *
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+#include "cde_gp10b.h"
+#include <nvgpu/log.h>
+enum gp10b_programs {
+        GP10B_PROG_HPASS              = 0,
+        GP10B_PROG_HPASS_4K           = 1,
+        GP10B_PROG_VPASS              = 2,
+        GP10B_PROG_VPASS_4K           = 3,
+        GP10B_PROG_HPASS_DEBUG        = 4,
+        GP10B_PROG_HPASS_4K_DEBUG     = 5,
+        GP10B_PROG_VPASS_DEBUG        = 6,
+        GP10B_PROG_VPASS_4K_DEBUG     = 7,
+        GP10B_PROG_PASSTHROUGH        = 8,
+};
+void gp10b_cde_get_program_numbers(struct gk20a *g,
+                                          u32 block_height_log2,
+                                          u32 shader_parameter,
+                                          int *hprog_out, int *vprog_out)
+{
+        int hprog, vprog;
+        if (shader_parameter == 1) {
+                hprog = GP10B_PROG_PASSTHROUGH;
+                vprog = GP10B_PROG_PASSTHROUGH;
+        } else {
+                hprog = GP10B_PROG_HPASS;
+                vprog = GP10B_PROG_VPASS;
+                if (shader_parameter == 2) {
+                        hprog = GP10B_PROG_HPASS_DEBUG;
+                        vprog = GP10B_PROG_VPASS_DEBUG;
+                }
+                if (g->mm.bypass_smmu) {
+                        if (!g->mm.disable_bigpage) {
+                                nvgpu_warn(g,
+                                           "when bypass_smmu is 1, disable_bigpage must be 1 too");
+                        }
+                        hprog |= 1;
+                        vprog |= 1;
+                }
+        }
+        *hprog_out = hprog;
+        *vprog_out = vprog;
+}
+bool gp10b_need_scatter_buffer(struct gk20a *g)
+{
+        return g->mm.bypass_smmu;
+}
+static u8 parity(u32 a)
+{
+        a ^= a>>16u;
+        a ^= a>>8u;
+        a ^= a>>4u;
+        a &= 0xfu;
+        return (0x6996u >> a) & 1u;
+}
+int gp10b_populate_scatter_buffer(struct gk20a *g,
+                                         struct sg_table *sgt,
+                                         size_t surface_size,
+                                         void *scatter_buffer_ptr,
+                                         size_t scatter_buffer_size)
+{
+        /* map scatter buffer to CPU VA and fill it */
+        const u32 page_size_log2 = 12;
+        const u32 page_size = 1 << page_size_log2;
+        const u32 page_size_shift = page_size_log2 - 7u;
+        /* 0011 1111 1111 1111 1111 1110 0100 1000 */
+        const u32 getSliceMaskGP10B = 0x3ffffe48;
+        u8 *scatter_buffer = scatter_buffer_ptr;
+        size_t i;
+        struct scatterlist *sg = NULL;
+        u8 d = 0;
+        size_t page = 0;
+        size_t pages_left;
+        surface_size = round_up(surface_size, page_size);
+        pages_left = surface_size >> page_size_log2;
+        if ((pages_left >> 3) > scatter_buffer_size)
+            return -ENOMEM;
+        for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+                unsigned int j;
+                u64 surf_pa = sg_phys(sg);
+                unsigned int n = (int)(sg->length >> page_size_log2);
+                gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
+                for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
+                        u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
+                        u8 scatter_bit = parity(addr);
+                        u8 bit = page & 7;
+                        d |= scatter_bit << bit;
+                        if (bit == 7) {
+                                scatter_buffer[page >> 3] = d;
+                                d = 0;
+                        }
+                        ++page;
+                        --pages_left;
+                }
+                if (pages_left == 0)
+                        break;
+        }
+        /* write the last byte in case the number of pages is not divisible by 8 */
+        if ((page & 7) != 0)
+                scatter_buffer[page >> 3] = d;
+        if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
+                gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:");
+                for (i = 0; i < page >> 3; i++) {
+                        gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]);
+                }
+        }
+        return 0;
+}
+struct nvgpu_os_linux_ops gp10b_cde_ops = {
+        .cde = {
+                .get_program_numbers = gp10b_cde_get_program_numbers,
+                .need_scatter_buffer = gp10b_need_scatter_buffer,
+                .populate_scatter_buffer = gp10b_populate_scatter_buffer,
+        },
+};

diff --git a/drivers/gpu/nvgpu/common/linux/cde_gp10b.c b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c new file mode 100644 index 00000000..ffae6e34 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/cde_gp10b.c
@@ -0,0 +1,160 @@
	1	/*
	2	* GP10B CDE
	3	*
	4	* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a
	7	* copy of this software and associated documentation files (the "Software"),
	8	* to deal in the Software without restriction, including without limitation
	9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	10	* and/or sell copies of the Software, and to permit persons to whom the
	11	* Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	22	* DEALINGS IN THE SOFTWARE.
	23	*/
	24
	25	#include "gk20a/gk20a.h"
	26	#include "cde_gp10b.h"
	27
	28	#include <nvgpu/log.h>
	29
	30	enum gp10b_programs {
	31	GP10B_PROG_HPASS = 0,
	32	GP10B_PROG_HPASS_4K = 1,
	33	GP10B_PROG_VPASS = 2,
	34	GP10B_PROG_VPASS_4K = 3,
	35	GP10B_PROG_HPASS_DEBUG = 4,
	36	GP10B_PROG_HPASS_4K_DEBUG = 5,
	37	GP10B_PROG_VPASS_DEBUG = 6,
	38	GP10B_PROG_VPASS_4K_DEBUG = 7,
	39	GP10B_PROG_PASSTHROUGH = 8,
	40	};
	41
	42	void gp10b_cde_get_program_numbers(struct gk20a *g,
	43	u32 block_height_log2,
	44	u32 shader_parameter,
	45	int hprog_out, int vprog_out)
	46	{
	47	int hprog, vprog;
	48
	49	if (shader_parameter == 1) {
	50	hprog = GP10B_PROG_PASSTHROUGH;
	51	vprog = GP10B_PROG_PASSTHROUGH;
	52	} else {
	53	hprog = GP10B_PROG_HPASS;
	54	vprog = GP10B_PROG_VPASS;
	55	if (shader_parameter == 2) {
	56	hprog = GP10B_PROG_HPASS_DEBUG;
	57	vprog = GP10B_PROG_VPASS_DEBUG;
	58	}
	59	if (g->mm.bypass_smmu) {
	60	if (!g->mm.disable_bigpage) {
	61	nvgpu_warn(g,
	62	"when bypass_smmu is 1, disable_bigpage must be 1 too");
	63	}
	64	hprog \|= 1;
	65	vprog \|= 1;
	66	}
	67	}
	68
	69	*hprog_out = hprog;
	70	*vprog_out = vprog;
	71	}
	72
	73	bool gp10b_need_scatter_buffer(struct gk20a *g)
	74	{
	75	return g->mm.bypass_smmu;
	76	}
	77
	78	static u8 parity(u32 a)
	79	{
	80	a ^= a>>16u;
	81	a ^= a>>8u;
	82	a ^= a>>4u;
	83	a &= 0xfu;
	84	return (0x6996u >> a) & 1u;
	85	}
	86
	87	int gp10b_populate_scatter_buffer(struct gk20a *g,
	88	struct sg_table *sgt,
	89	size_t surface_size,
	90	void *scatter_buffer_ptr,
	91	size_t scatter_buffer_size)
	92	{
	93	/* map scatter buffer to CPU VA and fill it */
	94	const u32 page_size_log2 = 12;
	95	const u32 page_size = 1 << page_size_log2;
	96	const u32 page_size_shift = page_size_log2 - 7u;
	97
	98	/* 0011 1111 1111 1111 1111 1110 0100 1000 */
	99	const u32 getSliceMaskGP10B = 0x3ffffe48;
	100	u8 *scatter_buffer = scatter_buffer_ptr;
	101
	102	size_t i;
	103	struct scatterlist *sg = NULL;
	104	u8 d = 0;
	105	size_t page = 0;
	106	size_t pages_left;
	107
	108	surface_size = round_up(surface_size, page_size);
	109
	110	pages_left = surface_size >> page_size_log2;
	111	if ((pages_left >> 3) > scatter_buffer_size)
	112	return -ENOMEM;
	113
	114	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
	115	unsigned int j;
	116	u64 surf_pa = sg_phys(sg);
	117	unsigned int n = (int)(sg->length >> page_size_log2);
	118
	119	gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
	120
	121	for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
	122	u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
	123	u8 scatter_bit = parity(addr);
	124	u8 bit = page & 7;
	125
	126	d \|= scatter_bit << bit;
	127	if (bit == 7) {
	128	scatter_buffer[page >> 3] = d;
	129	d = 0;
	130	}
	131
	132	++page;
	133	--pages_left;
	134	}
	135
	136	if (pages_left == 0)
	137	break;
	138	}
	139
	140	/* write the last byte in case the number of pages is not divisible by 8 */
	141	if ((page & 7) != 0)
	142	scatter_buffer[page >> 3] = d;
	143
	144	if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
	145	gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:");
	146	for (i = 0; i < page >> 3; i++) {
	147	gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]);
	148	}
	149	}
	150
	151	return 0;
	152	}
	153
	154	struct nvgpu_os_linux_ops gp10b_cde_ops = {
	155	.cde = {
	156	.get_program_numbers = gp10b_cde_get_program_numbers,
	157	.need_scatter_buffer = gp10b_need_scatter_buffer,
	158	.populate_scatter_buffer = gp10b_populate_scatter_buffer,
	159	},
	160	};