diff options
author | Jussi Rasanen <jrasanen@nvidia.com> | 2015-08-05 08:59:32 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-09-28 20:41:23 -0400 |
commit | bef2159086a3db04a53cdb28f163c3158f0a8b57 (patch) | |
tree | f498b02f7952d77a19df8e24cf939da5cff30c57 | |
parent | 613990cb391c74436384d63d12240221565011d5 (diff) |
gpu: nvgpu: Add support for CDE scatter buffers
Add support for CDE scatter buffers. When the bus addresses for
surfaces are not contiguous as seen by the GPU (e.g., when SMMU is
bypassed), CDE swizzling needs additional per-page information. This
information is populated in a scatter buffer when required.
Bug 1604102
Change-Id: I3384e2cfb5d5f628ed0f21375bdac8e36b77ae4f
Signed-off-by: Jussi Rasanen <jrasanen@nvidia.com>
Reviewed-on: http://git-master/r/789436
Reviewed-on: http://git-master/r/791243
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 131 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 4 |
5 files changed, 133 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 84b39b2d..ddca39f3 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -406,6 +406,12 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | |||
406 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | 406 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: |
407 | new_data = g->gr.gobs_per_comptagline_per_slice; | 407 | new_data = g->gr.gobs_per_comptagline_per_slice; |
408 | break; | 408 | break; |
409 | case TYPE_PARAM_SCATTERBUFFER: | ||
410 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
411 | break; | ||
412 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
413 | new_data = cde_ctx->scatterbuffer_size; | ||
414 | break; | ||
409 | default: | 415 | default: |
410 | user_id = param->id - NUM_RESERVED_PARAMS; | 416 | user_id = param->id - NUM_RESERVED_PARAMS; |
411 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | 417 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) |
@@ -899,9 +905,10 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) | |||
899 | } | 905 | } |
900 | 906 | ||
901 | int gk20a_cde_convert(struct gk20a *g, | 907 | int gk20a_cde_convert(struct gk20a *g, |
902 | struct dma_buf *compbits_buf, | 908 | struct dma_buf *compbits_scatter_buf, |
903 | s32 compbits_kind, u64 compbits_byte_offset, | 909 | u64 compbits_byte_offset, |
904 | u32 compbits_size, struct nvgpu_fence *fence, | 910 | u64 scatterbuffer_byte_offset, |
911 | struct nvgpu_fence *fence, | ||
905 | u32 __flags, struct gk20a_cde_param *params, | 912 | u32 __flags, struct gk20a_cde_param *params, |
906 | int num_params, struct gk20a_fence **fence_out) | 913 | int num_params, struct gk20a_fence **fence_out) |
907 | __acquires(&cde_app->mutex) | 914 | __acquires(&cde_app->mutex) |
@@ -909,13 +916,26 @@ __releases(&cde_app->mutex) | |||
909 | { | 916 | { |
910 | struct gk20a_cde_ctx *cde_ctx = NULL; | 917 | struct gk20a_cde_ctx *cde_ctx = NULL; |
911 | struct gk20a_comptags comptags; | 918 | struct gk20a_comptags comptags; |
912 | u64 compbits_offset = 0; | 919 | u64 mapped_compbits_offset = 0; |
920 | u64 compbits_size = 0; | ||
921 | u64 mapped_scatterbuffer_offset = 0; | ||
922 | u64 scatterbuffer_size = 0; | ||
913 | u64 map_vaddr = 0; | 923 | u64 map_vaddr = 0; |
914 | u64 map_offset = 0; | 924 | u64 map_offset = 0; |
915 | u32 map_size = 0; | 925 | u64 map_size = 0; |
926 | u8 *surface = NULL; | ||
916 | u64 big_page_mask = 0; | 927 | u64 big_page_mask = 0; |
917 | u32 flags; | 928 | u32 flags; |
918 | int err, i; | 929 | int err, i; |
930 | const s32 compbits_kind = 0; | ||
931 | |||
932 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
933 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
934 | |||
935 | /* scatter buffer must be after compbits buffer */ | ||
936 | if (scatterbuffer_byte_offset && | ||
937 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
938 | return -EINVAL; | ||
919 | 939 | ||
920 | mutex_lock(&g->cde_app.mutex); | 940 | mutex_lock(&g->cde_app.mutex); |
921 | 941 | ||
@@ -928,7 +948,7 @@ __releases(&cde_app->mutex) | |||
928 | /* First, map the buffer to local va */ | 948 | /* First, map the buffer to local va */ |
929 | 949 | ||
930 | /* ensure that the compbits buffer has drvdata */ | 950 | /* ensure that the compbits buffer has drvdata */ |
931 | err = gk20a_dmabuf_alloc_drvdata(compbits_buf, &g->dev->dev); | 951 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, &g->dev->dev); |
932 | if (err) | 952 | if (err) |
933 | goto exit_unlock; | 953 | goto exit_unlock; |
934 | 954 | ||
@@ -936,32 +956,88 @@ __releases(&cde_app->mutex) | |||
936 | the region to be mapped */ | 956 | the region to be mapped */ |
937 | big_page_mask = cde_ctx->vm->big_page_size - 1; | 957 | big_page_mask = cde_ctx->vm->big_page_size - 1; |
938 | map_offset = compbits_byte_offset & ~big_page_mask; | 958 | map_offset = compbits_byte_offset & ~big_page_mask; |
959 | map_size = compbits_scatter_buf->size - map_offset; | ||
960 | |||
939 | 961 | ||
940 | /* compute compbit start offset from the beginning of the mapped | 962 | /* compute compbit start offset from the beginning of the mapped |
941 | area */ | 963 | area */ |
942 | compbits_offset = compbits_byte_offset & big_page_mask; | 964 | mapped_compbits_offset = compbits_byte_offset - map_offset; |
943 | 965 | if (scatterbuffer_byte_offset) { | |
944 | if (!compbits_size) { | 966 | compbits_size = scatterbuffer_byte_offset - |
945 | compbits_size = compbits_buf->size - compbits_byte_offset; | 967 | compbits_byte_offset; |
946 | map_size = compbits_buf->size - map_offset; | 968 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - |
969 | map_offset; | ||
970 | scatterbuffer_size = compbits_scatter_buf->size - | ||
971 | scatterbuffer_byte_offset; | ||
972 | } else { | ||
973 | compbits_size = compbits_scatter_buf->size - | ||
974 | compbits_byte_offset; | ||
947 | } | 975 | } |
948 | 976 | ||
977 | gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
978 | map_offset, map_size); | ||
979 | gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
980 | mapped_compbits_offset, compbits_size); | ||
981 | gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
982 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
983 | |||
984 | |||
949 | /* map the destination buffer */ | 985 | /* map the destination buffer */ |
950 | get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */ | 986 | get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */ |
951 | map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0, | 987 | map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, |
952 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 988 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
953 | compbits_kind, NULL, true, | 989 | compbits_kind, NULL, true, |
954 | gk20a_mem_flag_none, | 990 | gk20a_mem_flag_none, |
955 | map_offset, map_size, | 991 | map_offset, map_size, |
956 | NULL); | 992 | NULL); |
957 | if (!map_vaddr) { | 993 | if (!map_vaddr) { |
958 | dma_buf_put(compbits_buf); | 994 | dma_buf_put(compbits_scatter_buf); |
959 | err = -EINVAL; | 995 | err = -EINVAL; |
960 | goto exit_unlock; | 996 | goto exit_unlock; |
961 | } | 997 | } |
962 | 998 | ||
999 | if (scatterbuffer_byte_offset && | ||
1000 | g->ops.cde.need_scatter_buffer && | ||
1001 | g->ops.cde.need_scatter_buffer(g)) { | ||
1002 | struct sg_table *sgt; | ||
1003 | void *scatter_buffer; | ||
1004 | |||
1005 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1006 | if (IS_ERR(surface)) { | ||
1007 | gk20a_warn(&g->dev->dev, | ||
1008 | "dma_buf_vmap failed"); | ||
1009 | err = -EINVAL; | ||
1010 | goto exit_unlock; | ||
1011 | } | ||
1012 | |||
1013 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1014 | |||
1015 | gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1016 | surface, scatter_buffer); | ||
1017 | sgt = gk20a_mm_pin(&g->dev->dev, compbits_scatter_buf); | ||
1018 | if (IS_ERR(sgt)) { | ||
1019 | gk20a_warn(&g->dev->dev, | ||
1020 | "mm_pin failed"); | ||
1021 | err = -EINVAL; | ||
1022 | goto exit_unlock; | ||
1023 | } else { | ||
1024 | err = g->ops.cde.populate_scatter_buffer(g, sgt, | ||
1025 | compbits_byte_offset, scatter_buffer, | ||
1026 | scatterbuffer_size); | ||
1027 | WARN_ON(err); | ||
1028 | |||
1029 | gk20a_mm_unpin(&g->dev->dev, compbits_scatter_buf, | ||
1030 | sgt); | ||
1031 | if (err) | ||
1032 | goto exit_unlock; | ||
1033 | } | ||
1034 | |||
1035 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1036 | surface = NULL; | ||
1037 | } | ||
1038 | |||
963 | /* store source buffer compression tags */ | 1039 | /* store source buffer compression tags */ |
964 | gk20a_get_comptags(&g->dev->dev, compbits_buf, &comptags); | 1040 | gk20a_get_comptags(&g->dev->dev, compbits_scatter_buf, &comptags); |
965 | cde_ctx->surf_param_offset = comptags.offset; | 1041 | cde_ctx->surf_param_offset = comptags.offset; |
966 | cde_ctx->surf_param_lines = comptags.lines; | 1042 | cde_ctx->surf_param_lines = comptags.lines; |
967 | 1043 | ||
@@ -971,9 +1047,12 @@ __releases(&cde_app->mutex) | |||
971 | cde_ctx->surf_vaddr = map_vaddr; | 1047 | cde_ctx->surf_vaddr = map_vaddr; |
972 | 1048 | ||
973 | /* store information about destination */ | 1049 | /* store information about destination */ |
974 | cde_ctx->compbit_vaddr = map_vaddr + compbits_offset; | 1050 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; |
975 | cde_ctx->compbit_size = compbits_size; | 1051 | cde_ctx->compbit_size = compbits_size; |
976 | 1052 | ||
1053 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1054 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1055 | |||
977 | /* remove existing argument data */ | 1056 | /* remove existing argument data */ |
978 | memset(cde_ctx->user_param_values, 0, | 1057 | memset(cde_ctx->user_param_values, 0, |
979 | sizeof(cde_ctx->user_param_values)); | 1058 | sizeof(cde_ctx->user_param_values)); |
@@ -1002,6 +1081,8 @@ __releases(&cde_app->mutex) | |||
1002 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | 1081 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); |
1003 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | 1082 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", |
1004 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | 1083 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); |
1084 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1085 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1005 | 1086 | ||
1006 | 1087 | ||
1007 | /* take always the postfence as it is needed for protecting the | 1088 | /* take always the postfence as it is needed for protecting the |
@@ -1024,6 +1105,9 @@ exit_unlock: | |||
1024 | if (map_vaddr) | 1105 | if (map_vaddr) |
1025 | gk20a_vm_unmap(cde_ctx->vm, map_vaddr); | 1106 | gk20a_vm_unmap(cde_ctx->vm, map_vaddr); |
1026 | 1107 | ||
1108 | if (surface) | ||
1109 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1110 | |||
1027 | mutex_unlock(&g->cde_app.mutex); | 1111 | mutex_unlock(&g->cde_app.mutex); |
1028 | return err; | 1112 | return err; |
1029 | } | 1113 | } |
@@ -1266,6 +1350,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1266 | struct gk20a *g, | 1350 | struct gk20a *g, |
1267 | struct dma_buf *dmabuf, u32 consumer, | 1351 | struct dma_buf *dmabuf, u32 consumer, |
1268 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | 1352 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, |
1353 | u64 scatterbuffer_offset, | ||
1269 | u32 width, u32 height, u32 block_height_log2, | 1354 | u32 width, u32 height, u32 block_height_log2, |
1270 | u32 submit_flags, struct nvgpu_fence *fence_in, | 1355 | u32 submit_flags, struct nvgpu_fence *fence_in, |
1271 | struct gk20a_buffer_state *state) | 1356 | struct gk20a_buffer_state *state) |
@@ -1310,9 +1395,9 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1310 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | 1395 | gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", |
1311 | xtiles, ytiles); | 1396 | xtiles, ytiles); |
1312 | 1397 | ||
1313 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx", | 1398 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", |
1314 | width, height, block_height_log2, | 1399 | width, height, block_height_log2, |
1315 | compbits_hoffset, compbits_voffset); | 1400 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); |
1316 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | 1401 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", |
1317 | width, height, xtiles, ytiles); | 1402 | width, height, xtiles, ytiles); |
1318 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | 1403 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", |
@@ -1386,9 +1471,8 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( | |||
1386 | #undef WRITE_PATCH | 1471 | #undef WRITE_PATCH |
1387 | 1472 | ||
1388 | err = gk20a_cde_convert(g, dmabuf, | 1473 | err = gk20a_cde_convert(g, dmabuf, |
1389 | 0, /* dst kind */ | ||
1390 | compbits_hoffset, | 1474 | compbits_hoffset, |
1391 | 0, /* dst_size, 0 = auto */ | 1475 | scatterbuffer_offset, |
1392 | fence_in, submit_flags, | 1476 | fence_in, submit_flags, |
1393 | params, param, &new_fence); | 1477 | params, param, &new_fence); |
1394 | if (err) | 1478 | if (err) |
@@ -1406,6 +1490,7 @@ out: | |||
1406 | static int gk20a_buffer_convert_gpu_to_cde( | 1490 | static int gk20a_buffer_convert_gpu_to_cde( |
1407 | struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, | 1491 | struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, |
1408 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | 1492 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, |
1493 | u64 scatterbuffer_offset, | ||
1409 | u32 width, u32 height, u32 block_height_log2, | 1494 | u32 width, u32 height, u32 block_height_log2, |
1410 | u32 submit_flags, struct nvgpu_fence *fence_in, | 1495 | u32 submit_flags, struct nvgpu_fence *fence_in, |
1411 | struct gk20a_buffer_state *state) | 1496 | struct gk20a_buffer_state *state) |
@@ -1425,7 +1510,8 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
1425 | if (g->cde_app.firmware_version == 1) { | 1510 | if (g->cde_app.firmware_version == 1) { |
1426 | err = gk20a_buffer_convert_gpu_to_cde_v1( | 1511 | err = gk20a_buffer_convert_gpu_to_cde_v1( |
1427 | g, dmabuf, consumer, offset, compbits_hoffset, | 1512 | g, dmabuf, consumer, offset, compbits_hoffset, |
1428 | compbits_voffset, width, height, block_height_log2, | 1513 | compbits_voffset, scatterbuffer_offset, |
1514 | width, height, block_height_log2, | ||
1429 | submit_flags, fence_in, state); | 1515 | submit_flags, fence_in, state); |
1430 | } else { | 1516 | } else { |
1431 | dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", | 1517 | dev_err(dev_from_gk20a(g), "unsupported CDE firmware version %d", |
@@ -1440,6 +1526,7 @@ static int gk20a_buffer_convert_gpu_to_cde( | |||
1440 | int gk20a_prepare_compressible_read( | 1526 | int gk20a_prepare_compressible_read( |
1441 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | 1527 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, |
1442 | u64 compbits_hoffset, u64 compbits_voffset, | 1528 | u64 compbits_hoffset, u64 compbits_voffset, |
1529 | u64 scatterbuffer_offset, | ||
1443 | u32 width, u32 height, u32 block_height_log2, | 1530 | u32 width, u32 height, u32 block_height_log2, |
1444 | u32 submit_flags, struct nvgpu_fence *fence, | 1531 | u32 submit_flags, struct nvgpu_fence *fence, |
1445 | u32 *valid_compbits, u32 *zbc_color, | 1532 | u32 *valid_compbits, u32 *zbc_color, |
@@ -1482,7 +1569,7 @@ int gk20a_prepare_compressible_read( | |||
1482 | g, dmabuf, | 1569 | g, dmabuf, |
1483 | missing_cde_bits, | 1570 | missing_cde_bits, |
1484 | offset, compbits_hoffset, | 1571 | offset, compbits_hoffset, |
1485 | compbits_voffset, | 1572 | compbits_voffset, scatterbuffer_offset, |
1486 | width, height, block_height_log2, | 1573 | width, height, block_height_log2, |
1487 | submit_flags, fence, | 1574 | submit_flags, fence, |
1488 | state); | 1575 | state); |
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h index a5c75ae8..fc5736ad 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A color decompression engine support | 2 | * GK20A color decompression engine support |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. | 4 | * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -129,6 +129,8 @@ enum { | |||
129 | TYPE_PARAM_SOURCE_SMMU_ADDR, | 129 | TYPE_PARAM_SOURCE_SMMU_ADDR, |
130 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | 130 | TYPE_PARAM_BACKINGSTORE_BASE_HW, |
131 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | 131 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, |
132 | TYPE_PARAM_SCATTERBUFFER, | ||
133 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
132 | NUM_RESERVED_PARAMS = 1024, | 134 | NUM_RESERVED_PARAMS = 1024, |
133 | }; | 135 | }; |
134 | 136 | ||
@@ -237,6 +239,9 @@ struct gk20a_cde_ctx { | |||
237 | u64 compbit_vaddr; | 239 | u64 compbit_vaddr; |
238 | u64 compbit_size; | 240 | u64 compbit_size; |
239 | 241 | ||
242 | u64 scatterbuffer_vaddr; | ||
243 | u64 scatterbuffer_size; | ||
244 | |||
240 | u64 backing_store_vaddr; | 245 | u64 backing_store_vaddr; |
241 | 246 | ||
242 | struct nvgpu_gpfifo *init_convert_cmd; | 247 | struct nvgpu_gpfifo *init_convert_cmd; |
@@ -276,16 +281,19 @@ void gk20a_cde_destroy(struct gk20a *g); | |||
276 | void gk20a_cde_suspend(struct gk20a *g); | 281 | void gk20a_cde_suspend(struct gk20a *g); |
277 | int gk20a_init_cde_support(struct gk20a *g); | 282 | int gk20a_init_cde_support(struct gk20a *g); |
278 | int gk20a_cde_reload(struct gk20a *g); | 283 | int gk20a_cde_reload(struct gk20a *g); |
279 | int gk20a_cde_convert(struct gk20a *g, struct dma_buf *compbits_buf, | 284 | int gk20a_cde_convert(struct gk20a *g, |
280 | s32 compbits_kind, u64 compbits_word_offset, | 285 | struct dma_buf *compbits_buf, |
281 | u32 compbits_size, struct nvgpu_fence *fence, | 286 | u64 compbits_byte_offset, |
282 | u32 __flags, struct gk20a_cde_param *params, | 287 | u64 scatterbuffer_byte_offset, |
283 | int num_params, struct gk20a_fence **fence_out); | 288 | struct nvgpu_fence *fence, |
289 | u32 __flags, struct gk20a_cde_param *params, | ||
290 | int num_params, struct gk20a_fence **fence_out); | ||
284 | void gk20a_cde_debugfs_init(struct platform_device *dev); | 291 | void gk20a_cde_debugfs_init(struct platform_device *dev); |
285 | 292 | ||
286 | int gk20a_prepare_compressible_read( | 293 | int gk20a_prepare_compressible_read( |
287 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | 294 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, |
288 | u64 compbits_hoffset, u64 compbits_voffset, | 295 | u64 compbits_hoffset, u64 compbits_voffset, |
296 | u64 scatterbuffer_offset, | ||
289 | u32 width, u32 height, u32 block_height_log2, | 297 | u32 width, u32 height, u32 block_height_log2, |
290 | u32 submit_flags, struct nvgpu_fence *fence, | 298 | u32 submit_flags, struct nvgpu_fence *fence, |
291 | u32 *valid_compbits, u32 *zbc_color, | 299 | u32 *valid_compbits, u32 *zbc_color, |
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 88a933b8..3b5ca298 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -92,6 +92,7 @@ static int gk20a_ctrl_prepare_compressible_read( | |||
92 | ret = gk20a_prepare_compressible_read(g, args->handle, | 92 | ret = gk20a_prepare_compressible_read(g, args->handle, |
93 | args->request_compbits, args->offset, | 93 | args->request_compbits, args->offset, |
94 | args->compbits_hoffset, args->compbits_voffset, | 94 | args->compbits_hoffset, args->compbits_voffset, |
95 | args->scatterbuffer_offset, | ||
95 | args->width, args->height, args->block_height_log2, | 96 | args->width, args->height, args->block_height_log2, |
96 | flags, &fence, &args->valid_compbits, | 97 | flags, &fence, &args->valid_compbits, |
97 | &args->zbc_color, &fence_out); | 98 | &args->zbc_color, &fence_out); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 46940744..d734c21f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -439,6 +439,12 @@ struct gpu_ops { | |||
439 | void (*get_program_numbers)(struct gk20a *g, | 439 | void (*get_program_numbers)(struct gk20a *g, |
440 | u32 block_height_log2, | 440 | u32 block_height_log2, |
441 | int *hprog, int *vprog); | 441 | int *hprog, int *vprog); |
442 | bool (*need_scatter_buffer)(struct gk20a *g); | ||
443 | int (*populate_scatter_buffer)(struct gk20a *g, | ||
444 | struct sg_table *sgt, | ||
445 | size_t surface_size, | ||
446 | void *scatter_buffer_ptr, | ||
447 | size_t scatter_buffer_size); | ||
442 | } cde; | 448 | } cde; |
443 | }; | 449 | }; |
444 | 450 | ||
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index c4edd305..138d7971 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -223,7 +223,9 @@ struct nvgpu_gpu_prepare_compressible_read_args { | |||
223 | __s32 fd; | 223 | __s32 fd; |
224 | } fence; /* in/out */ | 224 | } fence; /* in/out */ |
225 | __u32 zbc_color; /* out */ | 225 | __u32 zbc_color; /* out */ |
226 | __u32 reserved[5]; /* must be zero */ | 226 | __u32 reserved; /* must be zero */ |
227 | __u64 scatterbuffer_offset; /* in, within handle */ | ||
228 | __u32 reserved2[2]; /* must be zero */ | ||
227 | }; | 229 | }; |
228 | 230 | ||
229 | struct nvgpu_gpu_mark_compressible_write_args { | 231 | struct nvgpu_gpu_mark_compressible_write_args { |