summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c26
1 files changed, 10 insertions, 16 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index ad2ee159..3644c2ef 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -637,7 +637,8 @@ int gk20a_cde_convert(struct gk20a *g, struct dma_buf *src,
637 /* map the destination buffer */ 637 /* map the destination buffer */
638 get_dma_buf(dst); /* a ref for gk20a_vm_map */ 638 get_dma_buf(dst); /* a ref for gk20a_vm_map */
639 dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, 639 dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0,
640 0, dst_kind, NULL, true, 640 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
641 dst_kind, NULL, true,
641 gk20a_mem_flag_none, 642 gk20a_mem_flag_none,
642 0, 0); 643 0, 0);
643 if (!dst_vaddr) { 644 if (!dst_vaddr) {
@@ -654,7 +655,8 @@ int gk20a_cde_convert(struct gk20a *g, struct dma_buf *src,
654 /* map the source buffer to prevent premature release */ 655 /* map the source buffer to prevent premature release */
655 get_dma_buf(src); /* a ref for gk20a_vm_map */ 656 get_dma_buf(src); /* a ref for gk20a_vm_map */
656 src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0, 657 src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0,
657 0, dst_kind, NULL, true, 658 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
659 dst_kind, NULL, true,
658 gk20a_mem_flag_none, 660 gk20a_mem_flag_none,
659 0, 0); 661 0, 0);
660 if (!src_vaddr) { 662 if (!src_vaddr) {
@@ -794,7 +796,8 @@ int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
794 796
795 /* map backing store to gpu virtual space */ 797 /* map backing store to gpu virtual space */
796 vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt, 798 vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt,
797 g->gr.compbit_store.size, 0, 799 g->gr.compbit_store.size,
800 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
798 gk20a_mem_flag_read_only); 801 gk20a_mem_flag_read_only);
799 802
800 if (!vaddr) { 803 if (!vaddr) {
@@ -991,16 +994,14 @@ static int gk20a_buffer_convert_gpu_to_cde(
991 const int transposed_height = transpose ? width : height; 994 const int transposed_height = transpose ? width : height;
992 const int xtiles = (transposed_width + 7) >> 3; 995 const int xtiles = (transposed_width + 7) >> 3;
993 const int ytiles = (transposed_height + 7) >> 3; 996 const int ytiles = (transposed_height + 7) >> 3;
994 const int wgx = 16; 997 const int wgx = 8;
995 const int wgy = 8; 998 const int wgy = 8;
996 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ 999 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
997 const int dst_stride = 128; /* TODO chip constant */ 1000 const int dst_stride = 128; /* TODO chip constant */
998 const int xalign = compbits_per_byte * wgx; 1001 const int xalign = compbits_per_byte * wgx;
999 const int yalign = wgy; 1002 const int yalign = wgy;
1000 const int tilepitch = roundup(xtiles, xalign) / compbits_per_byte; 1003 const int gridw = roundup(xtiles, xalign) / xalign;
1001 const int ytilesaligned = roundup(ytiles, yalign); 1004 const int gridh = roundup(ytiles, yalign) / yalign;
1002 const int gridw = roundup(tilepitch, wgx) / wgx;
1003 const int gridh = roundup(ytilesaligned, wgy) / wgy;
1004 1005
1005 if (!g->cde_app.initialised) { 1006 if (!g->cde_app.initialised) {
1006 err = gk20a_cde_reload(g); 1007 err = gk20a_cde_reload(g);
@@ -1015,17 +1016,10 @@ static int gk20a_buffer_convert_gpu_to_cde(
1015 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_offset=0x%llx", 1016 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_offset=0x%llx",
1016 width, height, block_height_log2, compbits_offset); 1017 width, height, block_height_log2, compbits_offset);
1017 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d) invocations (%d, %d)", 1018 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d) invocations (%d, %d)",
1018 width, height, xtiles, ytiles, tilepitch, ytilesaligned); 1019 width, height, xtiles, ytiles, gridw*wgx, gridh*wgy);
1019 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)", 1020 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)",
1020 wgx, wgy, gridw, gridh); 1021 wgx, wgy, gridw, gridh);
1021 1022
1022 if (tilepitch % wgx != 0 || ytilesaligned % wgy != 0) {
1023 gk20a_warn(&g->dev->dev,
1024 "grid size (%d, %d) is not a multiple of work group size (%d, %d)",
1025 tilepitch, ytilesaligned, wgx, wgy);
1026 return -EINVAL;
1027 }
1028
1029 /* Write parameters */ 1023 /* Write parameters */
1030#define WRITE_PATCH(NAME, VALUE) \ 1024#define WRITE_PATCH(NAME, VALUE) \
1031 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} 1025 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}