From 48cbfac597021ce163d4df997cdbff2f2a73eb88 Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Tue, 18 Aug 2015 18:16:46 +0300 Subject: gpu: nvgpu: Add CDE scatter buffer code for GP10B Add GP10B-specific code for populating the scatter buffer. Essentially, this enables the use of SMMU bypass mode with 4-kB page compression. Bug 1604102 Change-Id: Ic586e2f93827b9aa1c7b73b53b8f65d518588c26 Signed-off-by: Sami Kiminki Reviewed-on: http://git-master/r/789434 Reviewed-on: http://git-master/r/806184 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gp10b/cde_gp10b.c | 84 +++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'drivers/gpu/nvgpu/gp10b/cde_gp10b.c') diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c index acb8aee3..dadec4d2 100644 --- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c @@ -58,7 +58,91 @@ static void gp10b_cde_get_program_numbers(struct gk20a *g, *vprog_out = vprog; } +static bool gp10b_need_scatter_buffer(struct gk20a *g) +{ + return g->mm.bypass_smmu; +} + +static u8 parity(u32 a) +{ + a ^= a>>16u; + a ^= a>>8u; + a ^= a>>4u; + a &= 0xfu; + return (0x6996u >> a) & 1u; +} + +static int gp10b_populate_scatter_buffer(struct gk20a *g, + struct sg_table *sgt, + size_t surface_size, + void *scatter_buffer_ptr, + size_t scatter_buffer_size) +{ + /* map scatter buffer to CPU VA and fill it */ + const u32 page_size_log2 = 12; + const u32 page_size = 1 << page_size_log2; + const u32 page_size_shift = page_size_log2 - 7u; + + /* 0011 1111 1111 1111 1111 1110 0100 1000 */ + const u32 getSliceMaskGP10B = 0x3ffffe48; + u8 *scatter_buffer = scatter_buffer_ptr; + + size_t i; + struct scatterlist *sg = NULL; + u8 d = 0; + size_t page = 0; + size_t pages_left; + + surface_size = round_up(surface_size, page_size); + + pages_left = surface_size >> page_size_log2; + if ((pages_left >> 3) > scatter_buffer_size) + return -ENOMEM; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + unsigned int j; + u64 surf_pa = sg_phys(sg); + unsigned int n = (int)(sg->length >> page_size_log2); + + gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n); + + for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) { + u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift; + u8 scatter_bit = parity(addr); + u8 bit = page & 7; + + d |= scatter_bit << bit; + if (bit == 7) { + scatter_buffer[page >> 3] = d; + d = 0; + } + + ++page; + --pages_left; + } + + if (pages_left == 0) + break; + } + + /* write the last byte in case the number of pages is not divisible by 8 */ + if ((page & 7) != 0) + scatter_buffer[page >> 3] = d; + +#if defined(GK20A_DEBUG) + if (unlikely(gpu_dbg_cde & gk20a_dbg_mask)) { + gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:"); + for (i=0; i < page>>3; i++) { + gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]); + } + } +#endif + return 0; +} + void gp10b_init_cde_ops(struct gpu_ops *gops) { gops->cde.get_program_numbers = gp10b_cde_get_program_numbers; + gops->cde.need_scatter_buffer = gp10b_need_scatter_buffer; + gops->cde.populate_scatter_buffer = gp10b_populate_scatter_buffer; } -- cgit v1.2.2