summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/cde_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c131
1 files changed, 109 insertions, 22 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 84b39b2d..ddca39f3 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -406,6 +406,12 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
406 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: 406 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
407 new_data = g->gr.gobs_per_comptagline_per_slice; 407 new_data = g->gr.gobs_per_comptagline_per_slice;
408 break; 408 break;
409 case TYPE_PARAM_SCATTERBUFFER:
410 new_data = cde_ctx->scatterbuffer_vaddr;
411 break;
412 case TYPE_PARAM_SCATTERBUFFER_SIZE:
413 new_data = cde_ctx->scatterbuffer_size;
414 break;
409 default: 415 default:
410 user_id = param->id - NUM_RESERVED_PARAMS; 416 user_id = param->id - NUM_RESERVED_PARAMS;
411 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) 417 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
@@ -899,9 +905,10 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
899} 905}
900 906
901int gk20a_cde_convert(struct gk20a *g, 907int gk20a_cde_convert(struct gk20a *g,
902 struct dma_buf *compbits_buf, 908 struct dma_buf *compbits_scatter_buf,
903 s32 compbits_kind, u64 compbits_byte_offset, 909 u64 compbits_byte_offset,
904 u32 compbits_size, struct nvgpu_fence *fence, 910 u64 scatterbuffer_byte_offset,
911 struct nvgpu_fence *fence,
905 u32 __flags, struct gk20a_cde_param *params, 912 u32 __flags, struct gk20a_cde_param *params,
906 int num_params, struct gk20a_fence **fence_out) 913 int num_params, struct gk20a_fence **fence_out)
907__acquires(&cde_app->mutex) 914__acquires(&cde_app->mutex)
@@ -909,13 +916,26 @@ __releases(&cde_app->mutex)
909{ 916{
910 struct gk20a_cde_ctx *cde_ctx = NULL; 917 struct gk20a_cde_ctx *cde_ctx = NULL;
911 struct gk20a_comptags comptags; 918 struct gk20a_comptags comptags;
912 u64 compbits_offset = 0; 919 u64 mapped_compbits_offset = 0;
920 u64 compbits_size = 0;
921 u64 mapped_scatterbuffer_offset = 0;
922 u64 scatterbuffer_size = 0;
913 u64 map_vaddr = 0; 923 u64 map_vaddr = 0;
914 u64 map_offset = 0; 924 u64 map_offset = 0;
915 u32 map_size = 0; 925 u64 map_size = 0;
926 u8 *surface = NULL;
916 u64 big_page_mask = 0; 927 u64 big_page_mask = 0;
917 u32 flags; 928 u32 flags;
918 int err, i; 929 int err, i;
930 const s32 compbits_kind = 0;
931
932 gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
933 compbits_byte_offset, scatterbuffer_byte_offset);
934
935 /* scatter buffer must be after compbits buffer */
936 if (scatterbuffer_byte_offset &&
937 scatterbuffer_byte_offset < compbits_byte_offset)
938 return -EINVAL;
919 939
920 mutex_lock(&g->cde_app.mutex); 940 mutex_lock(&g->cde_app.mutex);
921 941
@@ -928,7 +948,7 @@ __releases(&cde_app->mutex)
928 /* First, map the buffer to local va */ 948 /* First, map the buffer to local va */
929 949
930 /* ensure that the compbits buffer has drvdata */ 950 /* ensure that the compbits buffer has drvdata */
931 err = gk20a_dmabuf_alloc_drvdata(compbits_buf, &g->dev->dev); 951 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, &g->dev->dev);
932 if (err) 952 if (err)
933 goto exit_unlock; 953 goto exit_unlock;
934 954
@@ -936,32 +956,88 @@ __releases(&cde_app->mutex)
936 the region to be mapped */ 956 the region to be mapped */
937 big_page_mask = cde_ctx->vm->big_page_size - 1; 957 big_page_mask = cde_ctx->vm->big_page_size - 1;
938 map_offset = compbits_byte_offset & ~big_page_mask; 958 map_offset = compbits_byte_offset & ~big_page_mask;
959 map_size = compbits_scatter_buf->size - map_offset;
960
939 961
940 /* compute compbit start offset from the beginning of the mapped 962 /* compute compbit start offset from the beginning of the mapped
941 area */ 963 area */
942 compbits_offset = compbits_byte_offset & big_page_mask; 964 mapped_compbits_offset = compbits_byte_offset - map_offset;
943 965 if (scatterbuffer_byte_offset) {
944 if (!compbits_size) { 966 compbits_size = scatterbuffer_byte_offset -
945 compbits_size = compbits_buf->size - compbits_byte_offset; 967 compbits_byte_offset;
946 map_size = compbits_buf->size - map_offset; 968 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
969 map_offset;
970 scatterbuffer_size = compbits_scatter_buf->size -
971 scatterbuffer_byte_offset;
972 } else {
973 compbits_size = compbits_scatter_buf->size -
974 compbits_byte_offset;
947 } 975 }
948 976
977 gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu",
978 map_offset, map_size);
979 gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
980 mapped_compbits_offset, compbits_size);
981 gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
982 mapped_scatterbuffer_offset, scatterbuffer_size);
983
984
949 /* map the destination buffer */ 985 /* map the destination buffer */
950 get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */ 986 get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */
951 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, 987 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
952 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 988 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
953 compbits_kind, NULL, true, 989 compbits_kind, NULL, true,
954 gk20a_mem_flag_none, 990 gk20a_mem_flag_none,
955 map_offset, map_size, 991 map_offset, map_size,
956 NULL); 992 NULL);
957 if (!map_vaddr) { 993 if (!map_vaddr) {
958 dma_buf_put(compbits_buf); 994 dma_buf_put(compbits_scatter_buf);
959 err = -EINVAL; 995 err = -EINVAL;
960 goto exit_unlock; 996 goto exit_unlock;
961 } 997 }
962 998
999 if (scatterbuffer_byte_offset &&
1000 g->ops.cde.need_scatter_buffer &&
1001 g->ops.cde.need_scatter_buffer(g)) {
1002 struct sg_table *sgt;
1003 void *scatter_buffer;
1004
1005 surface = dma_buf_vmap(compbits_scatter_buf);
1006 if (IS_ERR(surface)) {
1007 gk20a_warn(&g->dev->dev,
1008 "dma_buf_vmap failed");
1009 err = -EINVAL;
1010 goto exit_unlock;
1011 }
1012
1013 scatter_buffer = surface + scatterbuffer_byte_offset;
1014
1015 gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1016 surface, scatter_buffer);
1017 sgt = gk20a_mm_pin(&g->dev->dev, compbits_scatter_buf);
1018 if (IS_ERR(sgt)) {
1019 gk20a_warn(&g->dev->dev,
1020 "mm_pin failed");
1021 err = -EINVAL;
1022 goto exit_unlock;
1023 } else {
1024 err = g->ops.cde.populate_scatter_buffer(g, sgt,
1025 compbits_byte_offset, scatter_buffer,
1026 scatterbuffer_size);
1027 WARN_ON(err);
1028
1029 gk20a_mm_unpin(&g->dev->dev, compbits_scatter_buf,
1030 sgt);
1031 if (err)
1032 goto exit_unlock;
1033 }
1034
1035 dma_buf_vunmap(compbits_scatter_buf, surface);
1036 surface = NULL;
1037 }
1038
963 /* store source buffer compression tags */ 1039 /* store source buffer compression tags */
964 gk20a_get_comptags(&g->dev->dev, compbits_buf, &comptags); 1040 gk20a_get_comptags(&g->dev->dev, compbits_scatter_buf, &comptags);
965 cde_ctx->surf_param_offset = comptags.offset; 1041 cde_ctx->surf_param_offset = comptags.offset;
966 cde_ctx->surf_param_lines = comptags.lines; 1042 cde_ctx->surf_param_lines = comptags.lines;
967 1043
@@ -971,9 +1047,12 @@ __releases(&cde_app->mutex)
971 cde_ctx->surf_vaddr = map_vaddr; 1047 cde_ctx->surf_vaddr = map_vaddr;
972 1048
973 /* store information about destination */ 1049 /* store information about destination */
974 cde_ctx->compbit_vaddr = map_vaddr + compbits_offset; 1050 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
975 cde_ctx->compbit_size = compbits_size; 1051 cde_ctx->compbit_size = compbits_size;
976 1052
1053 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1054 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1055
977 /* remove existing argument data */ 1056 /* remove existing argument data */
978 memset(cde_ctx->user_param_values, 0, 1057 memset(cde_ctx->user_param_values, 0,
979 sizeof(cde_ctx->user_param_values)); 1058 sizeof(cde_ctx->user_param_values));
@@ -1002,6 +1081,8 @@ __releases(&cde_app->mutex)
1002 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); 1081 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1003 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", 1082 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1004 cde_ctx->compbit_size, cde_ctx->compbit_vaddr); 1083 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1084 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1085 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1005 1086
1006 1087
1007 /* take always the postfence as it is needed for protecting the 1088 /* take always the postfence as it is needed for protecting the
@@ -1024,6 +1105,9 @@ exit_unlock:
1024 if (map_vaddr) 1105 if (map_vaddr)
1025 gk20a_vm_unmap(cde_ctx->vm, map_vaddr); 1106 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1026 1107
1108 if (surface)
1109 dma_buf_vunmap(compbits_scatter_buf, surface);
1110
1027 mutex_unlock(&g->cde_app.mutex); 1111 mutex_unlock(&g->cde_app.mutex);
1028 return err; 1112 return err;
1029} 1113}
@@ -1266,6 +1350,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1266 struct gk20a *g, 1350 struct gk20a *g,
1267 struct dma_buf *dmabuf, u32 consumer, 1351 struct dma_buf *dmabuf, u32 consumer,
1268 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1352 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1353 u64 scatterbuffer_offset,
1269 u32 width, u32 height, u32 block_height_log2, 1354 u32 width, u32 height, u32 block_height_log2,
1270 u32 submit_flags, struct nvgpu_fence *fence_in, 1355 u32 submit_flags, struct nvgpu_fence *fence_in,
1271 struct gk20a_buffer_state *state) 1356 struct gk20a_buffer_state *state)
@@ -1310,9 +1395,9 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1310 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", 1395 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1311 xtiles, ytiles); 1396 xtiles, ytiles);
1312 1397
1313 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx", 1398 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1314 width, height, block_height_log2, 1399 width, height, block_height_log2,
1315 compbits_hoffset, compbits_voffset); 1400 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1316 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", 1401 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1317 width, height, xtiles, ytiles); 1402 width, height, xtiles, ytiles);
1318 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", 1403 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
@@ -1386,9 +1471,8 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1386#undef WRITE_PATCH 1471#undef WRITE_PATCH
1387 1472
1388 err = gk20a_cde_convert(g, dmabuf, 1473 err = gk20a_cde_convert(g, dmabuf,
1389 0, /* dst kind */
1390 compbits_hoffset, 1474 compbits_hoffset,
1391 0, /* dst_size, 0 = auto */ 1475 scatterbuffer_offset,
1392 fence_in, submit_flags, 1476 fence_in, submit_flags,
1393 params, param, &new_fence); 1477 params, param, &new_fence);
1394 if (err) 1478 if (err)
@@ -1406,6 +1490,7 @@ out:
1406static int gk20a_buffer_convert_gpu_to_cde( 1490static int gk20a_buffer_convert_gpu_to_cde(
1407 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, 1491 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer,
1408 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1492 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1493 u64 scatterbuffer_offset,
1409 u32 width, u32 height, u32 block_height_log2, 1494 u32 width, u32 height, u32 block_height_log2,
1410 u32 submit_flags, struct nvgpu_fence *fence_in, 1495 u32 submit_flags, struct nvgpu_fence *fence_in,
1411 struct gk20a_buffer_state *state) 1496 struct gk20a_buffer_state *state)
@@ -1425,7 +1510,8 @@ static int gk20a_buffer_convert_gpu_to_cde(
1425 if (g->cde_app.firmware_version == 1) { 1510 if (g->cde_app.firmware_version == 1) {
1426 err = gk20a_buffer_convert_gpu_to_cde_v1( 1511 err = gk20a_buffer_convert_gpu_to_cde_v1(
1427 g, dmabuf, consumer, offset, compbits_hoffset, 1512 g, dmabuf, consumer, offset, compbits_hoffset,
1428 compbits_voffset, width, height, block_height_log2, 1513 compbits_voffset, scatterbuffer_offset,
1514 width, height, block_height_log2,
1429 submit_flags, fence_in, state); 1515 submit_flags, fence_in, state);
1430 } else { 1516 } else {
1431 dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", 1517 dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d",
@@ -1440,6 +1526,7 @@ static int gk20a_buffer_convert_gpu_to_cde(
1440int gk20a_prepare_compressible_read( 1526int gk20a_prepare_compressible_read(
1441 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 1527 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
1442 u64 compbits_hoffset, u64 compbits_voffset, 1528 u64 compbits_hoffset, u64 compbits_voffset,
1529 u64 scatterbuffer_offset,
1443 u32 width, u32 height, u32 block_height_log2, 1530 u32 width, u32 height, u32 block_height_log2,
1444 u32 submit_flags, struct nvgpu_fence *fence, 1531 u32 submit_flags, struct nvgpu_fence *fence,
1445 u32 *valid_compbits, u32 *zbc_color, 1532 u32 *valid_compbits, u32 *zbc_color,
@@ -1482,7 +1569,7 @@ int gk20a_prepare_compressible_read(
1482 g, dmabuf, 1569 g, dmabuf,
1483 missing_cde_bits, 1570 missing_cde_bits,
1484 offset, compbits_hoffset, 1571 offset, compbits_hoffset,
1485 compbits_voffset, 1572 compbits_voffset, scatterbuffer_offset,
1486 width, height, block_height_log2, 1573 width, height, block_height_log2,
1487 submit_flags, fence, 1574 submit_flags, fence,
1488 state); 1575 state);