summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c541
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.h45
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
4 files changed, 390 insertions, 200 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 472cc81c..8b2ed55e 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -337,8 +337,8 @@ static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
337} 337}
338 338
339static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, 339static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
340 const struct firmware *img, 340 const struct firmware *img,
341 struct gk20a_cde_hdr_replace *replace) 341 struct gk20a_cde_hdr_replace *replace)
342{ 342{
343 struct gk20a_cde_mem_desc *source_mem; 343 struct gk20a_cde_mem_desc *source_mem;
344 struct gk20a_cde_mem_desc *target_mem; 344 struct gk20a_cde_mem_desc *target_mem;
@@ -410,26 +410,26 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
410 g->gr.cacheline_size; 410 g->gr.cacheline_size;
411 break; 411 break;
412 case TYPE_PARAM_FIRSTPAGEOFFSET: 412 case TYPE_PARAM_FIRSTPAGEOFFSET:
413 new_data = cde_ctx->src_param_offset; 413 new_data = cde_ctx->surf_param_offset;
414 break; 414 break;
415 case TYPE_PARAM_NUMPAGES: 415 case TYPE_PARAM_NUMPAGES:
416 new_data = cde_ctx->src_param_lines; 416 new_data = cde_ctx->surf_param_lines;
417 break; 417 break;
418 case TYPE_PARAM_BACKINGSTORE: 418 case TYPE_PARAM_BACKINGSTORE:
419 new_data = cde_ctx->backing_store_vaddr; 419 new_data = cde_ctx->backing_store_vaddr;
420 break; 420 break;
421 case TYPE_PARAM_DESTINATION: 421 case TYPE_PARAM_DESTINATION:
422 new_data = cde_ctx->dest_vaddr; 422 new_data = cde_ctx->compbit_vaddr;
423 break; 423 break;
424 case TYPE_PARAM_DESTINATION_SIZE: 424 case TYPE_PARAM_DESTINATION_SIZE:
425 new_data = cde_ctx->dest_size; 425 new_data = cde_ctx->compbit_size;
426 break; 426 break;
427 case TYPE_PARAM_BACKINGSTORE_SIZE: 427 case TYPE_PARAM_BACKINGSTORE_SIZE:
428 new_data = g->gr.compbit_store.size; 428 new_data = g->gr.compbit_store.size;
429 break; 429 break;
430 case TYPE_PARAM_SOURCE_SMMU_ADDR: 430 case TYPE_PARAM_SOURCE_SMMU_ADDR:
431 new_data = gk20a_mm_gpuva_to_iova(cde_ctx->vm, 431 new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm,
432 cde_ctx->src_vaddr); 432 cde_ctx->surf_vaddr);
433 if (new_data == 0) 433 if (new_data == 0)
434 err = -EINVAL; 434 err = -EINVAL;
435 break; 435 break;
@@ -605,8 +605,9 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
605static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, 605static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
606 const struct firmware *img) 606 const struct firmware *img)
607{ 607{
608 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
608 u32 *data = (u32 *)img->data; 609 u32 *data = (u32 *)img->data;
609 u32 version, num_of_elems; 610 u32 num_of_elems;
610 struct gk20a_cde_hdr_elem *elem; 611 struct gk20a_cde_hdr_elem *elem;
611 u32 min_size = 0; 612 u32 min_size = 0;
612 int err = 0; 613 int err = 0;
@@ -618,7 +619,7 @@ static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
618 return -EINVAL; 619 return -EINVAL;
619 } 620 }
620 621
621 version = data[0]; 622 cde_app->firmware_version = data[0];
622 num_of_elems = data[1]; 623 num_of_elems = data[1];
623 624
624 min_size += num_of_elems * sizeof(*elem); 625 min_size += num_of_elems * sizeof(*elem);
@@ -654,6 +655,11 @@ static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
654 elem->command.num_entries); 655 elem->command.num_entries);
655 break; 656 break;
656 } 657 }
658 case TYPE_ARRAY:
659 memcpy(&cde_app->arrays[elem->array.id][0],
660 elem->array.data,
661 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
662 break;
657 default: 663 default:
658 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown header element"); 664 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown header element");
659 err = -EINVAL; 665 err = -EINVAL;
@@ -853,27 +859,25 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
853} 859}
854 860
855int gk20a_cde_convert(struct gk20a *g, 861int gk20a_cde_convert(struct gk20a *g,
856 struct dma_buf *dst, 862 struct dma_buf *compbits_buf,
857 s32 dst_kind, u64 dst_byte_offset, 863 s32 compbits_kind, u64 compbits_byte_offset,
858 u32 dst_size, struct nvgpu_fence *fence, 864 u32 compbits_size, struct nvgpu_fence *fence,
859 u32 __flags, struct gk20a_cde_param *params, 865 u32 __flags, struct gk20a_cde_param *params,
860 int num_params, struct gk20a_fence **fence_out) 866 int num_params, struct gk20a_fence **fence_out)
861__acquires(&cde_app->mutex) 867__acquires(&cde_app->mutex)
862__releases(&cde_app->mutex) 868__releases(&cde_app->mutex)
863{ 869{
864 struct gk20a_cde_app *cde_app = &g->cde_app; 870 struct gk20a_cde_ctx *cde_ctx = NULL;
865 struct gk20a_comptags comptags; 871 struct gk20a_comptags comptags;
866 struct gk20a_cde_ctx *cde_ctx; 872 u64 compbits_offset = 0;
867 u64 dst_vaddr = 0; 873 u64 map_vaddr = 0;
874 u64 map_offset = 0;
875 u32 map_size = 0;
876 u64 big_page_mask = 0;
868 u32 flags; 877 u32 flags;
869 int err, i; 878 int err, i;
870 879
871 if (!cde_app->initialised) { 880 mutex_lock(&g->cde_app.mutex);
872 gk20a_warn(&g->dev->dev, "cde: conversion requrest but no image has been provided");
873 return -ENOSYS;
874 }
875
876 mutex_lock(&cde_app->mutex);
877 881
878 cde_ctx = gk20a_cde_get_context(g); 882 cde_ctx = gk20a_cde_get_context(g);
879 if (IS_ERR(cde_ctx)) { 883 if (IS_ERR(cde_ctx)) {
@@ -881,38 +885,53 @@ __releases(&cde_app->mutex)
881 goto exit_unlock; 885 goto exit_unlock;
882 } 886 }
883 887
884 /* First, map the buffers to local va */ 888 /* First, map the buffer to local va */
885 889
886 /* ensure that the dst buffer has drvdata */ 890 /* ensure that the compbits buffer has drvdata */
887 err = gk20a_dmabuf_alloc_drvdata(dst, &g->dev->dev); 891 err = gk20a_dmabuf_alloc_drvdata(compbits_buf, &g->dev->dev);
888 if (err) 892 if (err)
889 goto exit_unlock; 893 goto exit_unlock;
890 894
895 /* compbits don't start at page aligned offset, so we need to align
896 the region to be mapped */
897 big_page_mask = cde_ctx->vm->big_page_size - 1;
898 map_offset = compbits_byte_offset & ~big_page_mask;
899
900 /* compute compbit start offset from the beginning of the mapped
901 area */
902 compbits_offset = compbits_byte_offset & big_page_mask;
903
904 if (!compbits_size) {
905 compbits_size = compbits_buf->size - compbits_byte_offset;
906 map_size = compbits_buf->size - map_offset;
907 }
908
891 /* map the destination buffer */ 909 /* map the destination buffer */
892 get_dma_buf(dst); /* a ref for gk20a_vm_map */ 910 get_dma_buf(compbits_buf); /* a ref for gk20a_vm_map */
893 dst_vaddr = gk20a_vm_map(cde_ctx->vm, dst, 0, 911 map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_buf, 0,
894 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 912 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
895 dst_kind, NULL, true, 913 compbits_kind, NULL, true,
896 gk20a_mem_flag_none, 914 gk20a_mem_flag_none,
897 0, 0); 915 map_offset, map_size);
898 if (!dst_vaddr) { 916 if (!map_vaddr) {
899 dma_buf_put(dst); 917 dma_buf_put(compbits_buf);
900 err = -EINVAL; 918 err = -EINVAL;
901 goto exit_unlock; 919 goto exit_unlock;
902 } 920 }
903 921
904 if (!dst_size)
905 dst_size = dst->size - dst_byte_offset;
906
907 /* store source buffer compression tags */ 922 /* store source buffer compression tags */
908 gk20a_get_comptags(&g->dev->dev, dst, &comptags); 923 gk20a_get_comptags(&g->dev->dev, compbits_buf, &comptags);
909 cde_ctx->src_vaddr = dst_vaddr; 924 cde_ctx->surf_param_offset = comptags.offset;
910 cde_ctx->src_param_offset = comptags.offset; 925 cde_ctx->surf_param_lines = comptags.lines;
911 cde_ctx->src_param_lines = comptags.lines; 926
927 /* store surface vaddr. This is actually compbit vaddr, but since
928 compbits live in the same surface, and we can get the alloc base
929 address by using gk20a_mm_gpuva_to_iova_base, this will do */
930 cde_ctx->surf_vaddr = map_vaddr;
912 931
913 /* store information about destination */ 932 /* store information about destination */
914 cde_ctx->dest_vaddr = dst_vaddr + dst_byte_offset; 933 cde_ctx->compbit_vaddr = map_vaddr + compbits_offset;
915 cde_ctx->dest_size = dst_size; 934 cde_ctx->compbit_size = compbits_size;
916 935
917 /* remove existing argument data */ 936 /* remove existing argument data */
918 memset(cde_ctx->user_param_values, 0, 937 memset(cde_ctx->user_param_values, 0,
@@ -940,8 +959,8 @@ __releases(&cde_app->mutex)
940 959
941 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", 960 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
942 g->gr.compbit_store.size, cde_ctx->backing_store_vaddr); 961 g->gr.compbit_store.size, cde_ctx->backing_store_vaddr);
943 gk20a_dbg(gpu_dbg_cde, "cde: buffer=dst, size=%llu, gpuva=%llx\n", 962 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
944 cde_ctx->dest_size, cde_ctx->dest_vaddr); 963 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
945 964
946 /* execute the init push buffer */ 965 /* execute the init push buffer */
947 if (!cde_ctx->init_cmd_executed) { 966 if (!cde_ctx->init_cmd_executed) {
@@ -964,11 +983,10 @@ __releases(&cde_app->mutex)
964exit_unlock: 983exit_unlock:
965 984
966 /* unmap the buffers - channel holds references to them now */ 985 /* unmap the buffers - channel holds references to them now */
967 if (dst_vaddr) 986 if (map_vaddr)
968 gk20a_vm_unmap(cde_ctx->vm, dst_vaddr); 987 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
969
970 mutex_unlock(&cde_app->mutex);
971 988
989 mutex_unlock(&g->cde_app.mutex);
972 return err; 990 return err;
973} 991}
974 992
@@ -1159,152 +1177,322 @@ __releases(&cde_app->mutex)
1159 return err; 1177 return err;
1160} 1178}
1161 1179
1162enum cde_launch_patch_offset {
1163 /* dst buffer width in roptiles */
1164 PATCH_USER_CONST_XTILES,
1165 /* dst buffer height in roptiles */
1166 PATCH_USER_CONST_YTILES,
1167 /* dst buffer log2(block height) */
1168 PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1169 /* dst buffer pitch in bytes */
1170 PATCH_USER_CONST_DSTPITCH,
1171 /* dst buffer write offset */
1172 PATCH_USER_CONST_DSTOFFSET,
1173 /* comp cache index of the first page of the surface,
1174 * kernel looks it up from PTE */
1175 PATCH_USER_CONST_FIRSTPAGEOFFSET,
1176 /* gmmu translated surface address, kernel fills */
1177 PATCH_USER_CONST_SURFADDR,
1178 /* dst buffer address >> 8, kernel fills */
1179 PATCH_VPC_DSTIMAGE_ADDR,
1180 /* dst buffer address >> 8, kernel fills */
1181 PATCH_VPC_DSTIMAGE_ADDR2,
1182 /* dst buffer size - 1, kernel fills */
1183 PATCH_VPC_DSTIMAGE_SIZE_MINUS_ONE,
1184 /* dst buffer size - 1, kernel fills */
1185 PATCH_VPC_DSTIMAGE_SIZE_MINUS_ONE2,
1186 /* dst buffer size, kernel fills */
1187 PATCH_VPC_DSTIMAGE_SIZE,
1188 /* dst buffer width in roptiles / work group width */
1189 PATCH_VPC_CURRENT_GRID_SIZE_X,
1190 /* dst buffer height in roptiles / work group height */
1191 PATCH_VPC_CURRENT_GRID_SIZE_Y,
1192 /* 1 */
1193 PATCH_VPC_CURRENT_GRID_SIZE_Z,
1194 /* work group width, 16 seems to be quite optimal */
1195 PATCH_VPC_CURRENT_GROUP_SIZE_X,
1196 /* work group height, 8 seems to be quite optimal */
1197 PATCH_VPC_CURRENT_GROUP_SIZE_Y,
1198 /* 1 */
1199 PATCH_VPC_CURRENT_GROUP_SIZE_Z,
1200 /* same as PATCH_VPC_CURRENT_GRID_SIZE_X */
1201 PATCH_QMD_CTA_RASTER_WIDTH,
1202 /* same as PATCH_VPC_CURRENT_GRID_SIZE_Y */
1203 PATCH_QMD_CTA_RASTER_HEIGHT,
1204 /* same as PATCH_VPC_CURRENT_GRID_SIZE_Z */
1205 PATCH_QMD_CTA_RASTER_DEPTH,
1206 /* same as PATCH_VPC_CURRENT_GROUP_SIZE_X */
1207 PATCH_QMD_CTA_THREAD_DIMENSION0,
1208 /* same as PATCH_VPC_CURRENT_GROUP_SIZE_Y */
1209 PATCH_QMD_CTA_THREAD_DIMENSION1,
1210 /* same as PATCH_VPC_CURRENT_GROUP_SIZE_Z */
1211 PATCH_QMD_CTA_THREAD_DIMENSION2,
1212
1213 NUM_CDE_LAUNCH_PATCHES
1214};
1215
1216enum cde_launch_patch_id { 1180enum cde_launch_patch_id {
1217 PATCH_QMD_CTA_RASTER_WIDTH_ID = 1024, 1181 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1218 PATCH_QMD_CTA_RASTER_HEIGHT_ID = 1025, 1182 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1219 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, 1183 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1220 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, 1184 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1221 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, 1185 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1222 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, 1186 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1223 PATCH_USER_CONST_XTILES_ID = 1030, 1187 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1224 PATCH_USER_CONST_YTILES_ID = 1031, 1188 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1225 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, 1189 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1226 PATCH_USER_CONST_DSTPITCH_ID = 1033, 1190 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1227 PATCH_USER_CONST_DSTOFFSET_ID = 1034, 1191 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1228 PATCH_VPC_CURRENT_GRID_SIZE_X_ID = 1035, 1192 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1229 PATCH_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, 1193 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1230 PATCH_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, 1194 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1231 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, 1195 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1232 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, 1196 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1233 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, 1197 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1198 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1199 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1200 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1201 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1202 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1203 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1204 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1205 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1206 PATCH_H_LAUNCH_WORD1_ID = 1049,
1207 PATCH_H_LAUNCH_WORD2_ID = 1050,
1208 PATCH_V_LAUNCH_WORD1_ID = 1051,
1209 PATCH_V_LAUNCH_WORD2_ID = 1052,
1210 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1211 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1212 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1213 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1234}; 1214};
1235 1215
1236static int gk20a_buffer_convert_gpu_to_cde( 1216enum programs {
1237 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, 1217 PROG_HPASS = 0,
1238 u64 offset, u64 compbits_offset, 1218 PROG_VPASS_LARGE = 1,
1219 PROG_VPASS_SMALL = 2,
1220 PROG_HPASS_DEBUG = 3,
1221 PROG_VPASS_LARGE_DEBUG = 4,
1222 PROG_VPASS_SMALL_DEBUG = 5,
1223 PROG_PASSTHROUGH = 6,
1224 NUM_PROGRAMS = 7
1225};
1226
1227/* maximum number of WRITE_PATCHes in the below function */
1228#define MAX_CDE_LAUNCH_PATCHES 32
1229
1230static int gk20a_buffer_convert_gpu_to_cde_v0(
1231 struct gk20a *g,
1232 struct dma_buf *dmabuf, u32 consumer,
1233 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1239 u32 width, u32 height, u32 block_height_log2, 1234 u32 width, u32 height, u32 block_height_log2,
1240 u32 submit_flags, struct nvgpu_fence *fence_in, 1235 u32 submit_flags, struct nvgpu_fence *fence_in,
1241 struct gk20a_fence **fence_out) 1236 struct gk20a_buffer_state *state)
1242{ 1237{
1243 struct gk20a_cde_param params[NUM_CDE_LAUNCH_PATCHES]; 1238 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1244 int param = 0; 1239 int param = 0;
1245 int err = 0; 1240 int err = 0;
1241 struct gk20a_fence *new_fence = NULL;
1242 const int wgx = 8;
1243 const int wgy = 8;
1244 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1245 const int xalign = compbits_per_byte * wgx;
1246 const int yalign = wgy;
1246 1247
1247 /* Compute per launch parameters */ 1248 /* firmware v0 needs to call swizzling twice */
1248 const bool transpose = (consumer == NVGPU_GPU_COMPBITS_CDEV); 1249 int i;
1249 const int transposed_width = transpose ? height : width; 1250 for (i = 0; i < 2; i++) {
1250 const int transposed_height = transpose ? width : height; 1251 /* Compute per launch parameters */
1251 const int xtiles = (transposed_width + 7) >> 3; 1252 const bool vpass = (i == 1);
1252 const int ytiles = (transposed_height + 7) >> 3; 1253 const int transposed_width = vpass ? height : width;
1254 const int transposed_height = vpass ? width : height;
1255 const int xtiles = (transposed_width + 7) >> 3;
1256 const int ytiles = (transposed_height + 7) >> 3;
1257 const int gridw = roundup(xtiles, xalign) / xalign;
1258 const int gridh = roundup(ytiles, yalign) / yalign;
1259 const int flags = (vpass ? 4 : 0) |
1260 g->cde_app.shader_parameter;
1261 const int dst_stride = 128; /* chip constant */
1262
1263 if ((vpass && !(consumer & NVGPU_GPU_COMPBITS_CDEV)) ||
1264 (!vpass && !(consumer & NVGPU_GPU_COMPBITS_CDEH)))
1265 continue;
1266
1267 if (xtiles > 4096 / 8 || ytiles > 4096 / 8)
1268 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1269 xtiles, ytiles);
1270
1271 gk20a_dbg(gpu_dbg_cde, "pass=%c", vpass ? 'V' : 'H');
1272 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx",
1273 width, height, block_height_log2,
1274 compbits_hoffset, compbits_voffset);
1275 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1276 width, height, xtiles, ytiles);
1277 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)",
1278 wgx, wgy, gridw, gridh);
1279
1280 /* Write parameters */
1281#define WRITE_PATCH(NAME, VALUE) \
1282 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1283 param = 0;
1284 WRITE_PATCH(PATCH_USER_CONST_XTILES, xtiles);
1285 WRITE_PATCH(PATCH_USER_CONST_YTILES, ytiles);
1286 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1287 block_height_log2);
1288 WRITE_PATCH(PATCH_USER_CONST_DSTPITCH, dst_stride);
1289 WRITE_PATCH(PATCH_H_USER_CONST_FLAGS, flags);
1290 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw);
1291 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh);
1292 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1293 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1294 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1295 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1296 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw);
1297 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh);
1298 WRITE_PATCH(PATCH_QMD_CTA_RASTER_DEPTH, 1);
1299 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1300 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1301 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION2, 1);
1302#undef WRITE_PATCH
1303
1304 err = gk20a_cde_convert(g, dmabuf,
1305 0, /* dst kind */
1306 vpass ?
1307 compbits_voffset :
1308 compbits_hoffset,
1309 0, /* dst_size, 0 = auto */
1310 fence_in, submit_flags,
1311 params, param,
1312 &new_fence);
1313 if (err)
1314 goto out;
1315
1316 /* compbits generated, update state & fence */
1317 gk20a_fence_put(state->fence);
1318 state->fence = new_fence;
1319 state->valid_compbits |= vpass ?
1320 NVGPU_GPU_COMPBITS_CDEV :
1321 NVGPU_GPU_COMPBITS_CDEH;
1322 }
1323out:
1324 return err;
1325}
1326
1327static int gk20a_buffer_convert_gpu_to_cde_v1(
1328 struct gk20a *g,
1329 struct dma_buf *dmabuf, u32 consumer,
1330 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1331 u32 width, u32 height, u32 block_height_log2,
1332 u32 submit_flags, struct nvgpu_fence *fence_in,
1333 struct gk20a_buffer_state *state)
1334{
1335 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1336 int param = 0;
1337 int err = 0;
1338 struct gk20a_fence *new_fence = NULL;
1253 const int wgx = 8; 1339 const int wgx = 8;
1254 const int wgy = 8; 1340 const int wgy = 8;
1255 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ 1341 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1256 const int dst_stride = 128; /* TODO chip constant */
1257 const int xalign = compbits_per_byte * wgx; 1342 const int xalign = compbits_per_byte * wgx;
1258 const int yalign = wgy; 1343 const int yalign = wgy;
1259 const int gridw = roundup(xtiles, xalign) / xalign;
1260 const int gridh = roundup(ytiles, yalign) / yalign;
1261 1344
1262 if (!g->cde_app.initialised) 1345 /* Compute per launch parameters */
1263 return -ENOSYS; 1346 const int xtiles = (width + 7) >> 3;
1347 const int ytiles = (height + 7) >> 3;
1348 const int gridw_h = roundup(xtiles, xalign) / xalign;
1349 const int gridh_h = roundup(ytiles, yalign) / yalign;
1350 const int gridw_v = roundup(ytiles, xalign) / xalign;
1351 const int gridh_v = roundup(xtiles, yalign) / yalign;
1352 const int xblocks = (xtiles + 1) >> 1;
1353 const int voffset = compbits_voffset - compbits_hoffset;
1354
1355 int hprog = PROG_HPASS;
1356 int vprog = (block_height_log2 >= 2) ?
1357 PROG_VPASS_LARGE : PROG_VPASS_SMALL;
1358 if (g->cde_app.shader_parameter == 1) {
1359 hprog = PROG_PASSTHROUGH;
1360 vprog = PROG_PASSTHROUGH;
1361 } else if (g->cde_app.shader_parameter == 2) {
1362 hprog = PROG_HPASS_DEBUG;
1363 vprog = (block_height_log2 >= 2) ?
1364 PROG_VPASS_LARGE_DEBUG :
1365 PROG_VPASS_SMALL_DEBUG;
1366 }
1264 1367
1265 if (xtiles > 4096 / 8 || ytiles > 4096 / 8) 1368 if (xtiles > 4096 / 8 || ytiles > 4096 / 8)
1266 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", 1369 gk20a_warn(&g->dev->dev, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1267 xtiles, ytiles); 1370 xtiles, ytiles);
1268 1371
1269 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_offset=0x%llx", 1372 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx",
1270 width, height, block_height_log2, compbits_offset); 1373 width, height, block_height_log2,
1271 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d) invocations (%d, %d)", 1374 compbits_hoffset, compbits_voffset);
1272 width, height, xtiles, ytiles, gridw*wgx, gridh*wgy); 1375 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1273 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)", 1376 width, height, xtiles, ytiles);
1274 wgx, wgy, gridw, gridh); 1377 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1378 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1379 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1380 hprog,
1381 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1382 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1383 vprog,
1384 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1385 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1275 1386
1276 /* Write parameters */ 1387 /* Write parameters */
1277#define WRITE_PATCH(NAME, VALUE) \ 1388#define WRITE_PATCH(NAME, VALUE) \
1278 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} 1389 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1279 WRITE_PATCH(PATCH_USER_CONST_XTILES, xtiles); 1390 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1280 WRITE_PATCH(PATCH_USER_CONST_YTILES, ytiles); 1391 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1281 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, block_height_log2); 1392 block_height_log2);
1282 WRITE_PATCH(PATCH_USER_CONST_DSTPITCH, dst_stride); 1393 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1283 WRITE_PATCH(PATCH_USER_CONST_DSTOFFSET, 1394 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1284 (transpose ? 4 : 0) | g->cde_app.shader_parameter);
1285 WRITE_PATCH(PATCH_VPC_CURRENT_GRID_SIZE_X, gridw);
1286 WRITE_PATCH(PATCH_VPC_CURRENT_GRID_SIZE_Y, gridh);
1287 WRITE_PATCH(PATCH_VPC_CURRENT_GRID_SIZE_Z, 1);
1288 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); 1395 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1289 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); 1396 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1290 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); 1397 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1291 WRITE_PATCH(PATCH_QMD_CTA_RASTER_WIDTH, gridw); 1398
1292 WRITE_PATCH(PATCH_QMD_CTA_RASTER_HEIGHT, gridh); 1399 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1293 WRITE_PATCH(PATCH_QMD_CTA_RASTER_DEPTH, 1); 1400 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1294 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); 1401 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1295 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); 1402 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1296 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION2, 1); 1403 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1404 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1405
1406 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1407 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1408 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1409 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1410 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1411 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1412
1413 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1414 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1415 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1416 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1417 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1418 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1419 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1420 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1421
1422 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1423 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1424 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1425 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1426 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1427 } else {
1428 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1429 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1430 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1431 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1432 }
1433
1434 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1435 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1436 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1437 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1438 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1439 } else {
1440 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1441 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1442 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1443 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1444 }
1297#undef WRITE_PATCH 1445#undef WRITE_PATCH
1298 1446
1299 err = gk20a_busy(g->dev);
1300 if (err)
1301 return err;
1302 err = gk20a_cde_convert(g, dmabuf, 1447 err = gk20a_cde_convert(g, dmabuf,
1303 0, /* dst kind */ 1448 0, /* dst kind */
1304 compbits_offset, 1449 compbits_hoffset,
1305 0, /* dst_size, 0 = auto */ 1450 0, /* dst_size, 0 = auto */
1306 fence_in, submit_flags, 1451 fence_in, submit_flags,
1307 params, param, fence_out); 1452 params, param, &new_fence);
1453 if (err)
1454 goto out;
1455
1456 /* compbits generated, update state & fence */
1457 gk20a_fence_put(state->fence);
1458 state->fence = new_fence;
1459 state->valid_compbits |= consumer &
1460 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1461out:
1462 return err;
1463}
1464
1465static int gk20a_buffer_convert_gpu_to_cde(
1466 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer,
1467 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1468 u32 width, u32 height, u32 block_height_log2,
1469 u32 submit_flags, struct nvgpu_fence *fence_in,
1470 struct gk20a_buffer_state *state)
1471{
1472 int err = 0;
1473
1474 if (!g->cde_app.initialised)
1475 return -ENOSYS;
1476
1477 err = gk20a_busy(g->dev);
1478 if (err)
1479 return err;
1480
1481 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1482 g->cde_app.firmware_version);
1483
1484 if (g->cde_app.firmware_version == 0) {
1485 err = gk20a_buffer_convert_gpu_to_cde_v0(
1486 g, dmabuf, consumer, offset, compbits_hoffset,
1487 compbits_voffset, width, height, block_height_log2,
1488 submit_flags, fence_in, state);
1489 } else {
1490 err = gk20a_buffer_convert_gpu_to_cde_v1(
1491 g, dmabuf, consumer, offset, compbits_hoffset,
1492 compbits_voffset, width, height, block_height_log2,
1493 submit_flags, fence_in, state);
1494 }
1495
1308 gk20a_idle(g->dev); 1496 gk20a_idle(g->dev);
1309 return err; 1497 return err;
1310} 1498}
@@ -1326,7 +1514,8 @@ int gk20a_prepare_compressible_read(
1326 if (IS_ERR(dmabuf)) 1514 if (IS_ERR(dmabuf))
1327 return -EINVAL; 1515 return -EINVAL;
1328 1516
1329 err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state); 1517 err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g),
1518 offset, &state);
1330 if (err) { 1519 if (err) {
1331 dma_buf_put(dmabuf); 1520 dma_buf_put(dmabuf);
1332 return err; 1521 return err;
@@ -1345,40 +1534,20 @@ int gk20a_prepare_compressible_read(
1345 err = -EINVAL; 1534 err = -EINVAL;
1346 goto out; 1535 goto out;
1347 } else if (missing_bits) { 1536 } else if (missing_bits) {
1348 struct gk20a_fence *new_fence = NULL; 1537 u32 missing_cde_bits = missing_bits &
1538 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1349 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && 1539 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1350 (missing_bits & NVGPU_GPU_COMPBITS_CDEH)) { 1540 missing_cde_bits) {
1351 err = gk20a_buffer_convert_gpu_to_cde( 1541 err = gk20a_buffer_convert_gpu_to_cde(
1352 g, dmabuf, 1542 g, dmabuf,
1353 NVGPU_GPU_COMPBITS_CDEH, 1543 missing_cde_bits,
1354 offset, compbits_hoffset, 1544 offset, compbits_hoffset,
1545 compbits_voffset,
1355 width, height, block_height_log2, 1546 width, height, block_height_log2,
1356 submit_flags, fence, 1547 submit_flags, fence,
1357 &new_fence); 1548 state);
1358 if (err) 1549 if (err)
1359 goto out; 1550 goto out;
1360
1361 /* CDEH bits generated, update state & fence */
1362 gk20a_fence_put(state->fence);
1363 state->fence = new_fence;
1364 state->valid_compbits |= NVGPU_GPU_COMPBITS_CDEH;
1365 }
1366 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1367 (missing_bits & NVGPU_GPU_COMPBITS_CDEV)) {
1368 err = gk20a_buffer_convert_gpu_to_cde(
1369 g, dmabuf,
1370 NVGPU_GPU_COMPBITS_CDEV,
1371 offset, compbits_voffset,
1372 width, height, block_height_log2,
1373 submit_flags, fence,
1374 &new_fence);
1375 if (err)
1376 goto out;
1377
1378 /* CDEH bits generated, update state & fence */
1379 gk20a_fence_put(state->fence);
1380 state->fence = new_fence;
1381 state->valid_compbits |= NVGPU_GPU_COMPBITS_CDEV;
1382 } 1551 }
1383 } 1552 }
1384 1553
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
index 3347490c..b160162c 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -23,8 +23,9 @@
23 23
24#define MAX_CDE_BUFS 10 24#define MAX_CDE_BUFS 10
25#define MAX_CDE_PARAMS 64 25#define MAX_CDE_PARAMS 64
26#define MAX_CDE_USER_PARAMS 32 26#define MAX_CDE_USER_PARAMS 40
27#define MAX_CDE_OBJ_IDS 4 27#define MAX_CDE_OBJ_IDS 4
28#define MAX_CDE_ARRAY_ENTRIES 9
28 29
29/* 30/*
30 * The size of the context ring buffer that is dedicated for handling cde 31 * The size of the context ring buffer that is dedicated for handling cde
@@ -162,6 +163,22 @@ struct gk20a_cde_cmd_elem {
162}; 163};
163 164
164/* 165/*
166 * This element is used for storing a small array of data.
167 */
168
169enum {
170 ARRAY_PROGRAM_OFFSET = 0,
171 ARRAY_REGISTER_COUNT,
172 ARRAY_LAUNCH_COMMAND,
173 NUM_CDE_ARRAYS
174};
175
176struct gk20a_cde_hdr_array {
177 u32 id;
178 u32 data[MAX_CDE_ARRAY_ENTRIES];
179};
180
181/*
165 * Following defines a single header element. Each element has a type and 182 * Following defines a single header element. Each element has a type and
166 * some of the data structures. 183 * some of the data structures.
167 */ 184 */
@@ -175,6 +192,7 @@ struct gk20a_cde_hdr_elem {
175 struct gk20a_cde_hdr_param param; 192 struct gk20a_cde_hdr_param param;
176 u32 required_class; 193 u32 required_class;
177 struct gk20a_cde_hdr_command command; 194 struct gk20a_cde_hdr_command command;
195 struct gk20a_cde_hdr_array array;
178 }; 196 };
179}; 197};
180 198
@@ -183,7 +201,8 @@ enum {
183 TYPE_REPLACE, 201 TYPE_REPLACE,
184 TYPE_PARAM, 202 TYPE_PARAM,
185 TYPE_REQUIRED_CLASS, 203 TYPE_REQUIRED_CLASS,
186 TYPE_COMMAND 204 TYPE_COMMAND,
205 TYPE_ARRAY
187}; 206};
188 207
189struct gk20a_cde_mem_desc { 208struct gk20a_cde_mem_desc {
@@ -219,14 +238,12 @@ struct gk20a_cde_ctx {
219 /* storage for user space parameter values */ 238 /* storage for user space parameter values */
220 u32 user_param_values[MAX_CDE_USER_PARAMS]; 239 u32 user_param_values[MAX_CDE_USER_PARAMS];
221 240
222 u64 src_smmu_addr; 241 u32 surf_param_offset;
223 u32 src_param_offset; 242 u32 surf_param_lines;
224 u32 src_param_lines; 243 u64 surf_vaddr;
225 244
226 u64 src_vaddr; 245 u64 compbit_vaddr;
227 246 u64 compbit_size;
228 u64 dest_vaddr;
229 u64 dest_size;
230 247
231 u32 obj_ids[MAX_CDE_OBJ_IDS]; 248 u32 obj_ids[MAX_CDE_OBJ_IDS];
232 int num_obj_ids; 249 int num_obj_ids;
@@ -259,6 +276,10 @@ struct gk20a_cde_app {
259 int ctx_usecount; 276 int ctx_usecount;
260 int ctx_count_top; 277 int ctx_count_top;
261 278
279 u32 firmware_version;
280
281 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
282
262 u32 shader_parameter; 283 u32 shader_parameter;
263}; 284};
264 285
@@ -266,9 +287,9 @@ void gk20a_cde_destroy(struct gk20a *g);
266void gk20a_cde_suspend(struct gk20a *g); 287void gk20a_cde_suspend(struct gk20a *g);
267int gk20a_init_cde_support(struct gk20a *g); 288int gk20a_init_cde_support(struct gk20a *g);
268int gk20a_cde_reload(struct gk20a *g); 289int gk20a_cde_reload(struct gk20a *g);
269int gk20a_cde_convert(struct gk20a *g, struct dma_buf *dst, 290int gk20a_cde_convert(struct gk20a *g, struct dma_buf *compbits_buf,
270 s32 dst_kind, u64 dst_word_offset, 291 s32 compbits_kind, u64 compbits_word_offset,
271 u32 dst_size, struct nvgpu_fence *fence, 292 u32 compbits_size, struct nvgpu_fence *fence,
272 u32 __flags, struct gk20a_cde_param *params, 293 u32 __flags, struct gk20a_cde_param *params,
273 int num_params, struct gk20a_fence **fence_out); 294 int num_params, struct gk20a_fence **fence_out);
274void gk20a_cde_debugfs_init(struct platform_device *dev); 295void gk20a_cde_debugfs_init(struct platform_device *dev);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index a390e36b..08dd41c5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1546,7 +1546,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1546 return vaddr; 1546 return vaddr;
1547} 1547}
1548 1548
1549dma_addr_t gk20a_mm_gpuva_to_iova(struct vm_gk20a *vm, u64 gpu_vaddr) 1549dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
1550{ 1550{
1551 struct mapped_buffer_node *buffer; 1551 struct mapped_buffer_node *buffer;
1552 dma_addr_t addr = 0; 1552 dma_addr_t addr = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 3f7042ee..efed79f8 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -530,7 +530,7 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
530int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); 530int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
531void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, 531void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
532 struct gk20a_comptags *comptags); 532 struct gk20a_comptags *comptags);
533dma_addr_t gk20a_mm_gpuva_to_iova(struct vm_gk20a *vm, u64 gpu_vaddr); 533dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
534 534
535int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); 535int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
536 536