summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
diff options
context:
space:
mode:
authorJussi Rasanen <jrasanen@nvidia.com>2015-08-05 08:59:32 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-09-28 20:41:23 -0400
commitbef2159086a3db04a53cdb28f163c3158f0a8b57 (patch)
treef498b02f7952d77a19df8e24cf939da5cff30c57 /drivers/gpu/nvgpu/gk20a/cde_gk20a.c
parent613990cb391c74436384d63d12240221565011d5 (diff)
gpu: nvgpu: Add support for CDE scatter buffers
Add support for CDE scatter buffers. When the bus addresses for surfaces are not contiguous as seen by the GPU (e.g., when SMMU is bypassed), CDE swizzling needs additional per-page information. This information is populated in a scatter buffer when required. Bug 1604102 Change-Id: I3384e2cfb5d5f628ed0f21375bdac8e36b77ae4f Signed-off-by: Jussi Rasanen <jrasanen@nvidia.com> Reviewed-on: http://git-master/r/789436 Reviewed-on: http://git-master/r/791243 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/cde_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c131
1 files changed, 109 insertions, 22 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 84b39b2d..ddca39f3 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -406,6 +406,12 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
406 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: 406 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
407 new_data = g->gr.gobs_per_comptagline_per_slice; 407 new_data = g->gr.gobs_per_comptagline_per_slice;
408 break; 408 break;
409 case TYPE_PARAM_SCATTERBUFFER:
410 new_data = cde_ctx->scatterbuffer_vaddr;
411 break;
412 case TYPE_PARAM_SCATTERBUFFER_SIZE:
413 new_data = cde_ctx->scatterbuffer_size;
414 break;
409 default: 415 default:
410 user_id = param->id - NUM_RESERVED_PARAMS; 416 user_id = param->id - NUM_RESERVED_PARAMS;
411 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) 417 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
@@ -899,9 +905,10 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
899} 905}
900 906
901int gk20a_cde_convert(struct gk20a *g, 907int gk20a_cde_convert(struct gk20a *g,
902 struct dma_buf *compbits_buf, 908 struct dma_buf *compbits_scatter_buf,
903 s32 compbits_kind, u64 compbits_byte_offset, 909 u64 compbits_byte_offset,
904 u32 compbits_size, struct nvgpu_fence *fence, 910 u64 scatterbuffer_byte_offset,
911 struct nvgpu_fence *fence,
905 u32 __flags, struct gk20a_cde_param *params, 912 u32 __flags, struct gk20a_cde_param *params,
906 int num_params, struct gk20a_fence **fence_out) 913 int num_params, struct gk20a_fence **fence_out)
907__acquires(&cde_app->mutex) 914__acquires(&cde_app->mutex)
@@ -909,13 +916,26 @@ __releases(&cde_app->mutex)
909{ 916{
910 struct gk20a_cde_ctx *cde_ctx = NULL; 917 struct gk20a_cde_ctx *cde_ctx = NULL;
911 struct gk20a_comptags comptags; 918 struct gk20a_comptags comptags;
912 u64 compbits_offset = 0; 919 u64 mapped_compbits_offset = 0;
920 u64 compbits_size = 0;
921 u64 mapped_scatterbuffer_offset = 0;
922 u64 scatterbuffer_size = 0;
913 u64 map_vaddr = 0; 923 u64 map_vaddr = 0;
914 u64 map_offset = 0; 924 u64 map_offset = 0;
915 u32 map_size = 0; 925 u64 map_size = 0;
926 u8 *surface = NULL;
916 u64 big_page_mask = 0; 927 u64 big_page_mask = 0;
917 u32 flags; 928 u32 flags;
918 int err, i; 929 int err, i;
930 const s32 compbits_kind = 0;
931
932 gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
933 compbits_byte_offset, scatterbuffer_byte_offset);
934
935 /* scatter buffer must be after compbits buffer */
936 if (scatterbuffer_byte_offset &&
937 scatterbuffer_byte_offset < compbits_byte_offset)
938 return -EINVAL;
919 939
920 mutex_lock(&g->cde_app.mutex); 940 mutex_lock(&g->cde_app.mutex);
921 941
@@ -928,7 +948,7 @@ __releases(&cde_app->mutex)
928 /* First, map the buffer to local va */ 948 /* First, map the buffer to local va */
929 949
930 /* ensure that the compbits buffer has drvdata */ 950 /* ensure that the compbits buffer has drvdata */
931 err = gk20a_dmabuf_alloc_drvdata(compbits_buf, &g->dev->dev); 951 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, &g->dev->dev);
932 if (err) 952 if (err)
933 goto exit_unlock; 953 goto exit_unlock;
934 954
@@ -936,32 +956,88 @@ __releases(&cde_app->mutex)
936 the region to be mapped */ 956 the region to be mapped */
937 big_page_mask = cde_ctx->vm->big_page_size - 1; 957 big_page_mask = cde_ctx->vm->big_page_size - 1;
938 map_offset = compbits_byte_offset & ~big_page_mask; 958 map_offset = compbits_byte_offset & ~big_page_mask;
959 map_size = compbits_scatter_buf->size - map_offset;
960
939 961
940 /* compute compbit start offset from the beginning of the mapped 962 /* compute compbit start offset from the beginning of the mapped
941 area */ 963 area */
942 compbits_offset = compbits_byte_offset & big_page_mask; 964 mapped_compbits_offset = compbits_byte_offset - map_offset;
943 965 if (scatterbuffer_byte_offset) {
944 if (!compbits_size) { 966 compbits_size = scatterbuffer_byte_offset -
945 compbits_size = compbits_buf->size - compbits_byte_offset; 967 compbits_byte_offset;
946 map_size = compbits_buf->size - map_offset; 968 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
969 map_offset;
970 scatterbuffer_size = compbits_scatter_buf->size -
971 scatterbuffer_byte_offset;
972 } else {
973 compbits_size = compbits_scatter_buf->size -
974 compbits_byte_offset;
947 } 975 }
948 976
977 gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu",
978 map_offset, map_size);
979 gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
980 mapped_compbits_offset, compbits_size);
981 gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
982 mapped_scatterbuffer_offset, scatterbuffer_size);
983
984
949 /* map the destination buffer */ 985 /* map the destination buffer */
950 get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */ 986 get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */
951 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, 987 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
952 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 988 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
953 compbits_kind, NULL, true, 989 compbits_kind, NULL, true,
954 gk20a_mem_flag_none, 990 gk20a_mem_flag_none,
955 map_offset, map_size, 991 map_offset, map_size,
956 NULL); 992 NULL);
957 if (!map_vaddr) { 993 if (!map_vaddr) {
958 dma_buf_put(compbits_buf); 994 dma_buf_put(compbits_scatter_buf);
959 err = -EINVAL; 995 err = -EINVAL;
960 goto exit_unlock; 996 goto exit_unlock;
961 } 997 }
962 998
999 if (scatterbuffer_byte_offset &&
1000 g->ops.cde.need_scatter_buffer &&
1001 g->ops.cde.need_scatter_buffer(g)) {
1002 struct sg_table *sgt;
1003 void *scatter_buffer;
1004
1005 surface = dma_buf_vmap(compbits_scatter_buf);
1006 if (IS_ERR(surface)) {
1007 gk20a_warn(&g->dev->dev,
1008 "dma_buf_vmap failed");
1009 err = -EINVAL;
1010 goto exit_unlock;
1011 }
1012
1013 scatter_buffer = surface + scatterbuffer_byte_offset;
1014
1015 gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1016 surface, scatter_buffer);
1017 sgt = gk20a_mm_pin(&g->dev->dev, compbits_scatter_buf);
1018 if (IS_ERR(sgt)) {
1019 gk20a_warn(&g->dev->dev,
1020 "mm_pin failed");
1021 err = -EINVAL;
1022 goto exit_unlock;
1023 } else {
1024 err = g->ops.cde.populate_scatter_buffer(g, sgt,
1025 compbits_byte_offset, scatter_buffer,
1026 scatterbuffer_size);
1027 WARN_ON(err);
1028
1029 gk20a_mm_unpin(&g->dev->dev, compbits_scatter_buf,
1030 sgt);
1031 if (err)
1032 goto exit_unlock;
1033 }
1034
1035 dma_buf_vunmap(compbits_scatter_buf, surface);
1036 surface = NULL;
1037 }
1038
963 /* store source buffer compression tags */ 1039 /* store source buffer compression tags */
964 gk20a_get_comptags(&g->dev->dev, compbits_buf, &comptags); 1040 gk20a_get_comptags(&g->dev->dev, compbits_scatter_buf, &comptags);
965 cde_ctx->surf_param_offset = comptags.offset; 1041 cde_ctx->surf_param_offset = comptags.offset;
966 cde_ctx->surf_param_lines = comptags.lines; 1042 cde_ctx->surf_param_lines = comptags.lines;
967 1043
@@ -971,9 +1047,12 @@ __releases(&cde_app->mutex)
971 cde_ctx->surf_vaddr = map_vaddr; 1047 cde_ctx->surf_vaddr = map_vaddr;
972 1048
973 /* store information about destination */ 1049 /* store information about destination */
974 cde_ctx->compbit_vaddr = map_vaddr + compbits_offset; 1050 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
975 cde_ctx->compbit_size = compbits_size; 1051 cde_ctx->compbit_size = compbits_size;
976 1052
1053 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1054 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1055
977 /* remove existing argument data */ 1056 /* remove existing argument data */
978 memset(cde_ctx->user_param_values, 0, 1057 memset(cde_ctx->user_param_values, 0,
979 sizeof(cde_ctx->user_param_values)); 1058 sizeof(cde_ctx->user_param_values));
@@ -1002,6 +1081,8 @@ __releases(&cde_app->mutex)
1002 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); 1081 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1003 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", 1082 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1004 cde_ctx->compbit_size, cde_ctx->compbit_vaddr); 1083 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1084 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1085 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1005 1086
1006 1087
1007 /* take always the postfence as it is needed for protecting the 1088 /* take always the postfence as it is needed for protecting the
@@ -1024,6 +1105,9 @@ exit_unlock:
1024 if (map_vaddr) 1105 if (map_vaddr)
1025 gk20a_vm_unmap(cde_ctx->vm, map_vaddr); 1106 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1026 1107
1108 if (surface)
1109 dma_buf_vunmap(compbits_scatter_buf, surface);
1110
1027 mutex_unlock(&g->cde_app.mutex); 1111 mutex_unlock(&g->cde_app.mutex);
1028 return err; 1112 return err;
1029} 1113}
@@ -1266,6 +1350,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1266 struct gk20a *g, 1350 struct gk20a *g,
1267 struct dma_buf *dmabuf, u32 consumer, 1351 struct dma_buf *dmabuf, u32 consumer,
1268 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1352 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1353 u64 scatterbuffer_offset,
1269 u32 width, u32 height, u32 block_height_log2, 1354 u32 width, u32 height, u32 block_height_log2,
1270 u32 submit_flags, struct nvgpu_fence *fence_in, 1355 u32 submit_flags, struct nvgpu_fence *fence_in,
1271 struct gk20a_buffer_state *state) 1356 struct gk20a_buffer_state *state)
@@ -1310,9 +1395,9 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1310 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", 1395 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1311 xtiles, ytiles); 1396 xtiles, ytiles);
1312 1397
1313 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx", 1398 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1314 width, height, block_height_log2, 1399 width, height, block_height_log2,
1315 compbits_hoffset, compbits_voffset); 1400 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1316 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", 1401 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1317 width, height, xtiles, ytiles); 1402 width, height, xtiles, ytiles);
1318 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", 1403 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
@@ -1386,9 +1471,8 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1386#undef WRITE_PATCH 1471#undef WRITE_PATCH
1387 1472
1388 err = gk20a_cde_convert(g, dmabuf, 1473 err = gk20a_cde_convert(g, dmabuf,
1389 0, /* dst kind */
1390 compbits_hoffset, 1474 compbits_hoffset,
1391 0, /* dst_size, 0 = auto */ 1475 scatterbuffer_offset,
1392 fence_in, submit_flags, 1476 fence_in, submit_flags,
1393 params, param, &new_fence); 1477 params, param, &new_fence);
1394 if (err) 1478 if (err)
@@ -1406,6 +1490,7 @@ out:
1406static int gk20a_buffer_convert_gpu_to_cde( 1490static int gk20a_buffer_convert_gpu_to_cde(
1407 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, 1491 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer,
1408 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1492 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1493 u64 scatterbuffer_offset,
1409 u32 width, u32 height, u32 block_height_log2, 1494 u32 width, u32 height, u32 block_height_log2,
1410 u32 submit_flags, struct nvgpu_fence *fence_in, 1495 u32 submit_flags, struct nvgpu_fence *fence_in,
1411 struct gk20a_buffer_state *state) 1496 struct gk20a_buffer_state *state)
@@ -1425,7 +1510,8 @@ static int gk20a_buffer_convert_gpu_to_cde(
1425 if (g->cde_app.firmware_version == 1) { 1510 if (g->cde_app.firmware_version == 1) {
1426 err = gk20a_buffer_convert_gpu_to_cde_v1( 1511 err = gk20a_buffer_convert_gpu_to_cde_v1(
1427 g, dmabuf, consumer, offset, compbits_hoffset, 1512 g, dmabuf, consumer, offset, compbits_hoffset,
1428 compbits_voffset, width, height, block_height_log2, 1513 compbits_voffset, scatterbuffer_offset,
1514 width, height, block_height_log2,
1429 submit_flags, fence_in, state); 1515 submit_flags, fence_in, state);
1430 } else { 1516 } else {
1431 dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", 1517 dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d",
@@ -1440,6 +1526,7 @@ static int gk20a_buffer_convert_gpu_to_cde(
1440int gk20a_prepare_compressible_read( 1526int gk20a_prepare_compressible_read(
1441 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 1527 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
1442 u64 compbits_hoffset, u64 compbits_voffset, 1528 u64 compbits_hoffset, u64 compbits_voffset,
1529 u64 scatterbuffer_offset,
1443 u32 width, u32 height, u32 block_height_log2, 1530 u32 width, u32 height, u32 block_height_log2,
1444 u32 submit_flags, struct nvgpu_fence *fence, 1531 u32 submit_flags, struct nvgpu_fence *fence,
1445 u32 *valid_compbits, u32 *zbc_color, 1532 u32 *valid_compbits, u32 *zbc_color,
@@ -1482,7 +1569,7 @@ int gk20a_prepare_compressible_read(
1482 g, dmabuf, 1569 g, dmabuf,
1483 missing_cde_bits, 1570 missing_cde_bits,
1484 offset, compbits_hoffset, 1571 offset, compbits_hoffset,
1485 compbits_voffset, 1572 compbits_voffset, scatterbuffer_offset,
1486 width, height, block_height_log2, 1573 width, height, block_height_log2,
1487 submit_flags, fence, 1574 submit_flags, fence,
1488 state); 1575 state);