diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/cde_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 131 |
1 files changed, 109 insertions, 22 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 84b39b2d..ddca39f3 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -406,6 +406,12 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | |||
406 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | 406 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: |
407 | new_data = g->gr.gobs_per_comptagline_per_slice; | 407 | new_data = g->gr.gobs_per_comptagline_per_slice; |
408 | break; | 408 | break; |
409 | case TYPE_PARAM_SCATTERBUFFER: | ||
410 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
411 | break; | ||
412 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
413 | new_data = cde_ctx->scatterbuffer_size; | ||
414 | break; | ||
409 | default: | 415 | default: |
410 | user_id = param->id - NUM_RESERVED_PARAMS; | 416 | user_id = param->id - NUM_RESERVED_PARAMS; |
411 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | 417 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) |
@@ -899,9 +905,10 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) | |||
899 | } | 905 | } |
900 | 906 | ||
901 | int gk20a_cde_convert(struct gk20a *g, | 907 | int gk20a_cde_convert(struct gk20a *g, |
902 | struct dma_buf *compbits_buf, | 908 | struct dma_buf *compbits_scatter_buf, |
903 | s32 compbits_kind, u64 compbits_byte_offset, | 909 | u64 compbits_byte_offset, |
904 | u32 compbits_size, struct nvgpu_fence *fence, | 910 | u64 scatterbuffer_byte_offset, |
911 | struct nvgpu_fence *fence, | ||
905 | u32 __flags, struct gk20a_cde_param *params, | 912 | u32 __flags, struct gk20a_cde_param *params, |
906 | int num_params, struct gk20a_fence **fence_out) | 913 | int num_params, struct gk20a_fence **fence_out) |
907 | __acquires(&cde_app->mutex) | 914 | __acquires(&cde_app->mutex) |
@@ -909,13 +916,26 @@ __releases(&cde_app->mutex) | |||
909 | { | 916 | { |
910 | struct gk20a_cde_ctx *cde_ctx = NULL; | 917 | struct gk20a_cde_ctx *cde_ctx = NULL; |
911 | struct gk20a_comptags comptags; | 918 | struct gk20a_comptags comptags; |
912 | u64 compbits_offset = 0; | 919 | u64 mapped_compbits_offset = 0; |
920 | u64 compbits_size = 0; | ||
921 | u64 mapped_scatterbuffer_offset = 0; | ||
922 | u64 scatterbuffer_size = 0; | ||
913 | u64 map_vaddr = 0; | 923 | u64 map_vaddr = 0; |
914 | u64 map_offset = 0; | 924 | u64 map_offset = 0; |
915 | u32 map_size = 0; | 925 | u64 map_size = 0; |
926 | u8 *surface = NULL; | ||
916 | u64 big_page_mask = 0; | 927 | u64 big_page_mask = 0; |
917 | u32 flags; | 928 | u32 flags; |
918 | int err, i; | 929 | int err, i; |
930 | const s32 compbits_kind = 0; | ||
931 | |||
932 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
933 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
934 | |||
935 | /* scatter buffer must be after compbits buffer */ | ||
936 | if (scatterbuffer_byte_offset && | ||
937 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
938 | return -EINVAL; | ||
919 | 939 | ||
920 | mutex_lock(&g->cde_app.mutex); | 940 | mutex_lock(&g->cde_app.mutex); |
921 | 941 | ||
@@ -928,7 +948,7 @@ __releases(&cde_app->mutex) | |||
928 | /* First, map the buffer to local va */ | 948 | /* First, map the buffer to local va */ |
929 | 949 | ||
930 | /* ensure that the compbits buffer has drvdata */ | 950 | /* ensure that the compbits buffer has drvdata */ |
931 | err = gk20a_dmabuf_alloc_drvdata(compbits_buf, &g->dev->dev); | 951 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, &g->dev->dev); |
932 | if (err) | 952 | if (err) |
933 | goto exit_unlock; | 953 | goto exit_unlock; |
934 | 954 | ||
@@ -936,32 +956,88 @@ __releases(&cde_app->mutex) | |||
936 | the region to be mapped */ | 956 | the region to be mapped */ |
937 | big_page_mask = cde_ctx->vm->big_page_size - 1; | 957 | big_page_mask = cde_ctx->vm->big_page_size - 1; |
938 | map_offset = compbits_byte_offset & ~big_page_mask; | 958 | map_offset = compbits_byte_offset & ~big_page_mask; |
959 | map_size = compbits_scatter_buf->size - map_offset; | ||
960 | |||
939 | 961 | ||
940 | /* compute compbit start offset from the beginning of the mapped | 962 | /* compute compbit start offset from the beginning of the mapped |
941 | area */ | 963 | area */ |
942 | compbits_offset = compbits_byte_offset & big_page_mask; | 964 | mapped_compbits_offset = compbits_byte_offset - map_offset; |
943 | 965 | if (scatterbuffer_byte_offset) { | |
944 | if (!compbits_size) { | 966 | compbits_size = scatterbuffer_byte_offset - |
945 | compbits_size = compbits_buf->size - compbits_byte_offset; | 967 | compbits_byte_offset; |
946 | map_size = compbits_buf->size - map_offset; | 968 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - |
969 | map_offset; | ||
970 | scatterbuffer_size = compbits_scatter_buf->size - | ||
971 | scatterbuffer_byte_offset; | ||
972 | } else { | ||
973 | compbits_size = compbits_scatter_buf->size - | ||
974 | compbits_byte_offset; | ||
947 | } | 975 | } |
948 | 976 | ||
977 | gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
978 | map_offset, map_size); | ||
979 | gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
980 | mapped_compbits_offset, compbits_size); | ||
981 | gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
982 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
983 | |||
984 | |||
949 | /* map the destination buffer */ | 985 | /* map the destination buffer */ |
950 | get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */ | 986 | get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */ |
951 | map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, | 987 | map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, |
952 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 988 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
953 | compbits_kind, NULL, true, | 989 | compbits_kind, NULL, true, |
954 | gk20a_mem_flag_none, | 990 | gk20a_mem_flag_none, |
955 | map_offset, map_size, | 991 | map_offset, map_size, |
956 | NULL); | 992 | NULL); |
957 | if (!map_vaddr) { | 993 | if (!map_vaddr) { |
958 | dma_buf_put(compbits_buf); | 994 | dma_buf_put(compbits_scatter_buf); |
959 | err = -EINVAL; | 995 | err = -EINVAL; |
960 | goto exit_unlock; | 996 | goto exit_unlock; |
961 | } | 997 | } |
962 | 998 | ||
999 | if (scatterbuffer_byte_offset && | ||
1000 | g->ops.cde.need_scatter_buffer && | ||
1001 | g->ops.cde.need_scatter_buffer(g)) { | ||
1002 | struct sg_table *sgt; | ||
1003 | void *scatter_buffer; | ||
1004 | |||
1005 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1006 | if (IS_ERR(surface)) { | ||
1007 | gk20a_warn(&g->dev->dev, | ||
1008 | "dma_buf_vmap failed"); | ||
1009 | err = -EINVAL; | ||
1010 | goto exit_unlock; | ||
1011 | } | ||
1012 | |||
1013 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1014 | |||
1015 | gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1016 | surface, scatter_buffer); | ||
1017 | sgt = gk20a_mm_pin(&g->dev->dev, compbits_scatter_buf); | ||
1018 | if (IS_ERR(sgt)) { | ||
1019 | gk20a_warn(&g->dev->dev, | ||
1020 | "mm_pin failed"); | ||
1021 | err = -EINVAL; | ||
1022 | goto exit_unlock; | ||
1023 | } else { | ||
1024 | err = g->ops.cde.populate_scatter_buffer(g, sgt, | ||
1025 | compbits_byte_offset, scatter_buffer, | ||
1026 | scatterbuffer_size); | ||
1027 | WARN_ON(err); | ||
1028 | |||
1029 | gk20a_mm_unpin(&g->dev->dev, compbits_scatter_buf, | ||
1030 | sgt); | ||
1031 | if (err) | ||
1032 | goto exit_unlock; | ||
1033 | } | ||
1034 | |||
1035 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1036 | surface = NULL; | ||
1037 | } | ||
1038 | |||
963 | /* store source buffer compression tags */ | 1039 | /* store source buffer compression tags */ |
964 | gk20a_get_comptags(&g->dev->dev, compbits_buf, &comptags); | 1040 | gk20a_get_comptags(&g->dev->dev, compbits_scatter_buf, &comptags); |
965 | cde_ctx->surf_param_offset = comptags.offset; | 1041 | cde_ctx->surf_param_offset = comptags.offset; |
966 | cde_ctx->surf_param_lines = comptags.lines; | 1042 | cde_ctx->surf_param_lines = comptags.lines; |
967 | 1043 | ||
@@ -971,9 +1047,12 @@ __releases(&cde_app->mutex) | |||
971 | cde_ctx->surf_vaddr = map_vaddr; | 1047 | cde_ctx->surf_vaddr = map_vaddr; |
972 | 1048 | ||
973 | /* store information about destination */ | 1049 | /* store information about destination */ |
974 | cde_ctx->compbit_vaddr = map_vaddr + compbits_offset; | 1050 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; |
975 | cde_ctx->compbit_size = compbits_size; | 1051 | cde_ctx->compbit_size = compbits_size; |
976 | 1052 | ||
1053 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1054 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1055 | |||
977 | /* remove existing argument data */ | 1056 | /* remove existing argument data */ |
978 | memset(cde_ctx->user_param_values, 0, | 1057 | memset(cde_ctx->user_param_values, 0, |
979 | sizeof(cde_ctx->user_param_values)); | 1058 | sizeof(cde_ctx->user_param_values)); |
@@ -1002,6 +1081,8 @@ __releases(&cde_app->mutex) | |||
1002 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | 1081 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); |
1003 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | 1082 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", |
1004 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | 1083 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); |
1084 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1085 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1005 | 1086 | ||
1006 | 1087 | ||
1007 | /* take always the postfence as it is needed for protecting the | 1088 | /* take always the postfence as it is needed for protecting the |
@@ -1024,6 +1105,9 @@ exit_unlock: | |||
1024 | if (map_vaddr) | 1105 | if (map_vaddr) |
1025 | gk20a_vm_unmap(cde_ctx->vm, map_vaddr); | 1106 | gk20a_vm_unmap(cde_ctx->vm, map_vaddr); |
1026 | 1107 | ||
1108 | if (surface) | ||
1109 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1110 | |||
1027 | mutex_unlock(&g->cde_app.mutex); | 1111 | mutex_unlock(&g->cde_app.mutex); |
1028 | return err; | 1112 | return err; |
1029 | } | 1113 | } |
@@ -1266,6 +1350,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1266 | struct gk20a *g, | 1350 | struct gk20a *g, |
1267 | struct dma_buf *dmabuf, u32 consumer, | 1351 | struct dma_buf *dmabuf, u32 consumer, |
1268 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | 1352 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, |
1353 | u64 scatterbuffer_offset, | ||
1269 | u32 width, u32 height, u32 block_height_log2, | 1354 | u32 width, u32 height, u32 block_height_log2, |
1270 | u32 submit_flags, struct nvgpu_fence *fence_in, | 1355 | u32 submit_flags, struct nvgpu_fence *fence_in, |
1271 | struct gk20a_buffer_state *state) | 1356 | struct gk20a_buffer_state *state) |
@@ -1310,9 +1395,9 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1310 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | 1395 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", |
1311 | xtiles, ytiles); | 1396 | xtiles, ytiles); |
1312 | 1397 | ||
1313 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx", | 1398 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", |
1314 | width, height, block_height_log2, | 1399 | width, height, block_height_log2, |
1315 | compbits_hoffset, compbits_voffset); | 1400 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); |
1316 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | 1401 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", |
1317 | width, height, xtiles, ytiles); | 1402 | width, height, xtiles, ytiles); |
1318 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | 1403 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", |
@@ -1386,9 +1471,8 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1386 | #undef WRITE_PATCH | 1471 | #undef WRITE_PATCH |
1387 | 1472 | ||
1388 | err = gk20a_cde_convert(g, dmabuf, | 1473 | err = gk20a_cde_convert(g, dmabuf, |
1389 | 0, /* dst kind */ | ||
1390 | compbits_hoffset, | 1474 | compbits_hoffset, |
1391 | 0, /* dst_size, 0 = auto */ | 1475 | scatterbuffer_offset, |
1392 | fence_in, submit_flags, | 1476 | fence_in, submit_flags, |
1393 | params, param, &new_fence); | 1477 | params, param, &new_fence); |
1394 | if (err) | 1478 | if (err) |
@@ -1406,6 +1490,7 @@ out: | |||
1406 | static int gk20a_buffer_convert_gpu_to_cde( | 1490 | static int gk20a_buffer_convert_gpu_to_cde( |
1407 | struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, | 1491 | struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, |
1408 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | 1492 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, |
1493 | u64 scatterbuffer_offset, | ||
1409 | u32 width, u32 height, u32 block_height_log2, | 1494 | u32 width, u32 height, u32 block_height_log2, |
1410 | u32 submit_flags, struct nvgpu_fence *fence_in, | 1495 | u32 submit_flags, struct nvgpu_fence *fence_in, |
1411 | struct gk20a_buffer_state *state) | 1496 | struct gk20a_buffer_state *state) |
@@ -1425,7 +1510,8 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
1425 | if (g->cde_app.firmware_version == 1) { | 1510 | if (g->cde_app.firmware_version == 1) { |
1426 | err = gk20a_buffer_convert_gpu_to_cde_v1( | 1511 | err = gk20a_buffer_convert_gpu_to_cde_v1( |
1427 | g, dmabuf, consumer, offset, compbits_hoffset, | 1512 | g, dmabuf, consumer, offset, compbits_hoffset, |
1428 | compbits_voffset, width, height, block_height_log2, | 1513 | compbits_voffset, scatterbuffer_offset, |
1514 | width, height, block_height_log2, | ||
1429 | submit_flags, fence_in, state); | 1515 | submit_flags, fence_in, state); |
1430 | } else { | 1516 | } else { |
1431 | dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", | 1517 | dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", |
@@ -1440,6 +1526,7 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
1440 | int gk20a_prepare_compressible_read( | 1526 | int gk20a_prepare_compressible_read( |
1441 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | 1527 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, |
1442 | u64 compbits_hoffset, u64 compbits_voffset, | 1528 | u64 compbits_hoffset, u64 compbits_voffset, |
1529 | u64 scatterbuffer_offset, | ||
1443 | u32 width, u32 height, u32 block_height_log2, | 1530 | u32 width, u32 height, u32 block_height_log2, |
1444 | u32 submit_flags, struct nvgpu_fence *fence, | 1531 | u32 submit_flags, struct nvgpu_fence *fence, |
1445 | u32 *valid_compbits, u32 *zbc_color, | 1532 | u32 *valid_compbits, u32 *zbc_color, |
@@ -1482,7 +1569,7 @@ int gk20a_prepare_compressible_read( | |||
1482 | g, dmabuf, | 1569 | g, dmabuf, |
1483 | missing_cde_bits, | 1570 | missing_cde_bits, |
1484 | offset, compbits_hoffset, | 1571 | offset, compbits_hoffset, |
1485 | compbits_voffset, | 1572 | compbits_voffset, scatterbuffer_offset, |
1486 | width, height, block_height_log2, | 1573 | width, height, block_height_log2, |
1487 | submit_flags, fence, | 1574 | submit_flags, fence, |
1488 | state); | 1575 | state); |