diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 26 |
1 files changed, 10 insertions, 16 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ad2ee159..3644c2ef 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -637,7 +637,8 @@ int gk20a_cde_convert(struct gk20a *g, struct dma_buf *src, | |||
637 | /* map the destination buffer */ | 637 | /* map the destination buffer */ |
638 | get_dma_buf(dst); /* a ref for gk20a_vm_map */ | 638 | get_dma_buf(dst); /* a ref for gk20a_vm_map */ |
639 | dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, | 639 | dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, |
640 | 0, dst_kind, NULL, true, | 640 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
641 | dst_kind, NULL, true, | ||
641 | gk20a_mem_flag_none, | 642 | gk20a_mem_flag_none, |
642 | 0, 0); | 643 | 0, 0); |
643 | if (!dst_vaddr) { | 644 | if (!dst_vaddr) { |
@@ -654,7 +655,8 @@ int gk20a_cde_convert(struct gk20a *g, struct dma_buf *src, | |||
654 | /* map the source buffer to prevent premature release */ | 655 | /* map the source buffer to prevent premature release */ |
655 | get_dma_buf(src); /* a ref for gk20a_vm_map */ | 656 | get_dma_buf(src); /* a ref for gk20a_vm_map */ |
656 | src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0, | 657 | src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0, |
657 | 0, dst_kind, NULL, true, | 658 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
659 | dst_kind, NULL, true, | ||
658 | gk20a_mem_flag_none, | 660 | gk20a_mem_flag_none, |
659 | 0, 0); | 661 | 0, 0); |
660 | if (!src_vaddr) { | 662 | if (!src_vaddr) { |
@@ -794,7 +796,8 @@ int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | |||
794 | 796 | ||
795 | /* map backing store to gpu virtual space */ | 797 | /* map backing store to gpu virtual space */ |
796 | vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt, | 798 | vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt, |
797 | g->gr.compbit_store.size, 0, | 799 | g->gr.compbit_store.size, |
800 | NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
798 | gk20a_mem_flag_read_only); | 801 | gk20a_mem_flag_read_only); |
799 | 802 | ||
800 | if (!vaddr) { | 803 | if (!vaddr) { |
@@ -991,16 +994,14 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
991 | const int transposed_height = transpose ? width : height; | 994 | const int transposed_height = transpose ? width : height; |
992 | const int xtiles = (transposed_width + 7) >> 3; | 995 | const int xtiles = (transposed_width + 7) >> 3; |
993 | const int ytiles = (transposed_height + 7) >> 3; | 996 | const int ytiles = (transposed_height + 7) >> 3; |
994 | const int wgx = 16; | 997 | const int wgx = 8; |
995 | const int wgy = 8; | 998 | const int wgy = 8; |
996 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | 999 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ |
997 | const int dst_stride = 128; /* TODO chip constant */ | 1000 | const int dst_stride = 128; /* TODO chip constant */ |
998 | const int xalign = compbits_per_byte * wgx; | 1001 | const int xalign = compbits_per_byte * wgx; |
999 | const int yalign = wgy; | 1002 | const int yalign = wgy; |
1000 | const int tilepitch = roundup(xtiles, xalign) / compbits_per_byte; | 1003 | const int gridw = roundup(xtiles, xalign) / xalign; |
1001 | const int ytilesaligned = roundup(ytiles, yalign); | 1004 | const int gridh = roundup(ytiles, yalign) / yalign; |
1002 | const int gridw = roundup(tilepitch, wgx) / wgx; | ||
1003 | const int gridh = roundup(ytilesaligned, wgy) / wgy; | ||
1004 | 1005 | ||
1005 | if (!g->cde_app.initialised) { | 1006 | if (!g->cde_app.initialised) { |
1006 | err = gk20a_cde_reload(g); | 1007 | err = gk20a_cde_reload(g); |
@@ -1015,17 +1016,10 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
1015 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_offset=0x%llx", | 1016 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_offset=0x%llx", |
1016 | width, height, block_height_log2, compbits_offset); | 1017 | width, height, block_height_log2, compbits_offset); |
1017 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d) invocations (%d, %d)", | 1018 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d) invocations (%d, %d)", |
1018 | width, height, xtiles, ytiles, tilepitch, ytilesaligned); | 1019 | width, height, xtiles, ytiles, gridw*wgx, gridh*wgy); |
1019 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)", | 1020 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)", |
1020 | wgx, wgy, gridw, gridh); | 1021 | wgx, wgy, gridw, gridh); |
1021 | 1022 | ||
1022 | if (tilepitch % wgx != 0 || ytilesaligned % wgy != 0) { | ||
1023 | gk20a_warn(&g->dev->dev, | ||
1024 | "grid size (%d, %d) is not a multiple of work group size (%d, %d)", | ||
1025 | tilepitch, ytilesaligned, wgx, wgy); | ||
1026 | return -EINVAL; | ||
1027 | } | ||
1028 | |||
1029 | /* Write parameters */ | 1023 | /* Write parameters */ |
1030 | #define WRITE_PATCH(NAME, VALUE) \ | 1024 | #define WRITE_PATCH(NAME, VALUE) \ |
1031 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | 1025 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} |