diff options
author | Lauri Peltonen <lpeltonen@nvidia.com> | 2014-07-18 09:02:23 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:10:44 -0400 |
commit | 574ee40e51bf3f4fe989f8e572e611ae4ffa0795 (patch) | |
tree | 4083fb74ed6861d679299131f3577c09c33ff99d /drivers/gpu/nvgpu | |
parent | c8faa10d1dc9bb0c4c2815c38fb71d8acdd1108d (diff) |
gpu: nvgpu: Add compression state IOCTLs
Bug 1409151
Change-Id: I29a325d7c2b481764fc82d945795d50bcb841961
Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 335 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 76 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 1 |
4 files changed, 399 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index d01426be..46568879 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/nvhost.h> | 19 | #include <linux/nvhost.h> |
20 | #include <linux/dma-mapping.h> | 20 | #include <linux/dma-mapping.h> |
21 | #include <linux/firmware.h> | 21 | #include <linux/firmware.h> |
22 | #include <linux/fs.h> | ||
22 | #include <linux/debugfs.h> | 23 | #include <linux/debugfs.h> |
23 | #include <linux/dma-buf.h> | 24 | #include <linux/dma-buf.h> |
24 | 25 | ||
@@ -596,7 +597,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | |||
596 | num_entries, flags, fence, fence_out); | 597 | num_entries, flags, fence, fence_out); |
597 | } | 598 | } |
598 | 599 | ||
599 | int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | 600 | int gk20a_cde_convert(struct gk20a *g, struct dma_buf *src, |
601 | struct dma_buf *dst, | ||
600 | s32 dst_kind, u64 dst_byte_offset, | 602 | s32 dst_kind, u64 dst_byte_offset, |
601 | u32 dst_size, struct nvhost_fence *fence, | 603 | u32 dst_size, struct nvhost_fence *fence, |
602 | u32 __flags, struct gk20a_cde_param *params, | 604 | u32 __flags, struct gk20a_cde_param *params, |
@@ -605,7 +607,6 @@ int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | |||
605 | struct gk20a_cde_app *cde_app = &g->cde_app; | 607 | struct gk20a_cde_app *cde_app = &g->cde_app; |
606 | struct gk20a_comptags comptags; | 608 | struct gk20a_comptags comptags; |
607 | struct gk20a_cde_ctx *cde_ctx; | 609 | struct gk20a_cde_ctx *cde_ctx; |
608 | struct dma_buf *src = NULL, *dst = NULL; | ||
609 | u64 dst_vaddr = 0, src_vaddr = 0; | 610 | u64 dst_vaddr = 0, src_vaddr = 0; |
610 | u32 flags; | 611 | u32 flags; |
611 | int err, i; | 612 | int err, i; |
@@ -622,14 +623,7 @@ int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | |||
622 | cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) % | 623 | cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) % |
623 | ARRAY_SIZE(cde_app->cde_ctx); | 624 | ARRAY_SIZE(cde_app->cde_ctx); |
624 | 625 | ||
625 | /* First, get buffer references and map the buffers to local va */ | 626 | /* First, map the buffers to local va */ |
626 | |||
627 | dst = dma_buf_get(dst_fd); | ||
628 | if (IS_ERR(src)) { | ||
629 | dst = NULL; | ||
630 | err = -EINVAL; | ||
631 | goto exit_unlock; | ||
632 | } | ||
633 | 627 | ||
634 | /* ensure that the dst buffer has drvdata */ | 628 | /* ensure that the dst buffer has drvdata */ |
635 | err = gk20a_dmabuf_alloc_drvdata(dst, &g->dev->dev); | 629 | err = gk20a_dmabuf_alloc_drvdata(dst, &g->dev->dev); |
@@ -637,18 +631,13 @@ int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | |||
637 | goto exit_unlock; | 631 | goto exit_unlock; |
638 | 632 | ||
639 | /* map the destination buffer */ | 633 | /* map the destination buffer */ |
634 | get_dma_buf(dst); /* a ref for gk20a_vm_map */ | ||
640 | dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, | 635 | dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, |
641 | 0, dst_kind, NULL, true, | 636 | 0, dst_kind, NULL, true, |
642 | gk20a_mem_flag_none, | 637 | gk20a_mem_flag_none, |
643 | 0, 0); | 638 | 0, 0); |
644 | if (!dst_vaddr) { | 639 | if (!dst_vaddr) { |
645 | err = -EINVAL; | 640 | dma_buf_put(dst); |
646 | goto exit_unlock; | ||
647 | } | ||
648 | |||
649 | src = dma_buf_get(src_fd); | ||
650 | if (IS_ERR(src)) { | ||
651 | src = NULL; | ||
652 | err = -EINVAL; | 641 | err = -EINVAL; |
653 | goto exit_unlock; | 642 | goto exit_unlock; |
654 | } | 643 | } |
@@ -659,11 +648,13 @@ int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | |||
659 | goto exit_unlock; | 648 | goto exit_unlock; |
660 | 649 | ||
661 | /* map the source buffer to prevent premature release */ | 650 | /* map the source buffer to prevent premature release */ |
651 | get_dma_buf(src); /* a ref for gk20a_vm_map */ | ||
662 | src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0, | 652 | src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0, |
663 | 0, dst_kind, NULL, true, | 653 | 0, dst_kind, NULL, true, |
664 | gk20a_mem_flag_none, | 654 | gk20a_mem_flag_none, |
665 | 0, 0); | 655 | 0, 0); |
666 | if (!src_vaddr) { | 656 | if (!src_vaddr) { |
657 | dma_buf_put(src); | ||
667 | err = -EINVAL; | 658 | err = -EINVAL; |
668 | goto exit_unlock; | 659 | goto exit_unlock; |
669 | } | 660 | } |
@@ -765,12 +756,6 @@ exit_unlock: | |||
765 | if (src_vaddr) | 756 | if (src_vaddr) |
766 | gk20a_vm_unmap(g->cde_app.vm, src_vaddr); | 757 | gk20a_vm_unmap(g->cde_app.vm, src_vaddr); |
767 | 758 | ||
768 | /* drop dmabuf refs if work was aborted */ | ||
769 | if (err && src) | ||
770 | dma_buf_put(src); | ||
771 | if (err && dst) | ||
772 | dma_buf_put(dst); | ||
773 | |||
774 | mutex_unlock(&cde_app->mutex); | 759 | mutex_unlock(&cde_app->mutex); |
775 | 760 | ||
776 | return err; | 761 | return err; |
@@ -922,3 +907,307 @@ err_init_instance: | |||
922 | } | 907 | } |
923 | return ret; | 908 | return ret; |
924 | } | 909 | } |
910 | |||
911 | enum cde_launch_patch_offset { | ||
912 | /* dst buffer width in roptiles */ | ||
913 | PATCH_USER_CONST_XTILES, | ||
914 | /* dst buffer height in roptiles */ | ||
915 | PATCH_USER_CONST_YTILES, | ||
916 | /* dst buffer log2(block height) */ | ||
917 | PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
918 | /* dst buffer pitch in bytes */ | ||
919 | PATCH_USER_CONST_DSTPITCH, | ||
920 | /* dst buffer write offset */ | ||
921 | PATCH_USER_CONST_DSTOFFSET, | ||
922 | /* comp cache index of the first page of the surface, | ||
923 | * kernel looks it up from PTE */ | ||
924 | PATCH_USER_CONST_FIRSTPAGEOFFSET, | ||
925 | /* gmmu translated surface address, kernel fills */ | ||
926 | PATCH_USER_CONST_SURFADDR, | ||
927 | /* dst buffer address >> 8, kernel fills */ | ||
928 | PATCH_VPC_DSTIMAGE_ADDR, | ||
929 | /* dst buffer address >> 8, kernel fills */ | ||
930 | PATCH_VPC_DSTIMAGE_ADDR2, | ||
931 | /* dst buffer size - 1, kernel fills */ | ||
932 | PATCH_VPC_DSTIMAGE_SIZE_MINUS_ONE, | ||
933 | /* dst buffer size - 1, kernel fills */ | ||
934 | PATCH_VPC_DSTIMAGE_SIZE_MINUS_ONE2, | ||
935 | /* dst buffer size, kernel fills */ | ||
936 | PATCH_VPC_DSTIMAGE_SIZE, | ||
937 | /* dst buffer width in roptiles / work group width */ | ||
938 | PATCH_VPC_CURRENT_GRID_SIZE_X, | ||
939 | /* dst buffer height in roptiles / work group height */ | ||
940 | PATCH_VPC_CURRENT_GRID_SIZE_Y, | ||
941 | /* 1 */ | ||
942 | PATCH_VPC_CURRENT_GRID_SIZE_Z, | ||
943 | /* work group width, 16 seems to be quite optimal */ | ||
944 | PATCH_VPC_CURRENT_GROUP_SIZE_X, | ||
945 | /* work group height, 8 seems to be quite optimal */ | ||
946 | PATCH_VPC_CURRENT_GROUP_SIZE_Y, | ||
947 | /* 1 */ | ||
948 | PATCH_VPC_CURRENT_GROUP_SIZE_Z, | ||
949 | /* same as PATCH_VPC_CURRENT_GRID_SIZE_X */ | ||
950 | PATCH_QMD_CTA_RASTER_WIDTH, | ||
951 | /* same as PATCH_VPC_CURRENT_GRID_SIZE_Y */ | ||
952 | PATCH_QMD_CTA_RASTER_HEIGHT, | ||
953 | /* same as PATCH_VPC_CURRENT_GRID_SIZE_Z */ | ||
954 | PATCH_QMD_CTA_RASTER_DEPTH, | ||
955 | /* same as PATCH_VPC_CURRENT_GROUP_SIZE_X */ | ||
956 | PATCH_QMD_CTA_THREAD_DIMENSION0, | ||
957 | /* same as PATCH_VPC_CURRENT_GROUP_SIZE_Y */ | ||
958 | PATCH_QMD_CTA_THREAD_DIMENSION1, | ||
959 | /* same as PATCH_VPC_CURRENT_GROUP_SIZE_Z */ | ||
960 | PATCH_QMD_CTA_THREAD_DIMENSION2, | ||
961 | |||
962 | NUM_CDE_LAUNCH_PATCHES | ||
963 | }; | ||
964 | |||
965 | enum cde_launch_patch_id { | ||
966 | PATCH_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
967 | PATCH_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
968 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, | ||
969 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
970 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
971 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, | ||
972 | PATCH_USER_CONST_XTILES_ID = 1030, | ||
973 | PATCH_USER_CONST_YTILES_ID = 1031, | ||
974 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
975 | PATCH_USER_CONST_DSTPITCH_ID = 1033, | ||
976 | PATCH_USER_CONST_DSTOFFSET_ID = 1034, | ||
977 | PATCH_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
978 | PATCH_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
979 | PATCH_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
980 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
981 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
982 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
983 | }; | ||
984 | |||
985 | static int gk20a_buffer_convert_gpu_to_cde( | ||
986 | struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, | ||
987 | u64 offset, u64 compbits_offset, | ||
988 | u32 width, u32 height, u32 block_height_log2, | ||
989 | u32 submit_flags, struct nvhost_fence *fence_in, | ||
990 | struct gk20a_fence **fence_out) | ||
991 | { | ||
992 | struct gk20a_cde_param params[NUM_CDE_LAUNCH_PATCHES]; | ||
993 | int param = 0; | ||
994 | int err = 0; | ||
995 | |||
996 | /* Compute per launch parameters */ | ||
997 | const bool transpose = (consumer == NVHOST_GPU_COMPBITS_CDEV); | ||
998 | const int transposed_width = transpose ? height : width; | ||
999 | const int transposed_height = transpose ? width : height; | ||
1000 | const int xtiles = (transposed_width + 7) >> 3; | ||
1001 | const int ytiles = (transposed_height + 7) >> 3; | ||
1002 | const int wgx = 16; | ||
1003 | const int wgy = 8; | ||
1004 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1005 | const int dst_stride = 128; /* TODO chip constant */ | ||
1006 | const int xalign = compbits_per_byte * wgx; | ||
1007 | const int yalign = wgy; | ||
1008 | const int tilepitch = roundup(xtiles, xalign) / compbits_per_byte; | ||
1009 | const int ytilesaligned = roundup(ytiles, yalign); | ||
1010 | const int gridw = roundup(tilepitch, wgx) / wgx; | ||
1011 | const int gridh = roundup(ytilesaligned, wgy) / wgy; | ||
1012 | |||
1013 | if (xtiles > 4096 / 8 || ytiles > 4096 / 8) { | ||
1014 | gk20a_warn(&g->dev->dev, "cde: too large surface"); | ||
1015 | return -EINVAL; | ||
1016 | } | ||
1017 | |||
1018 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_offset=0x%llx", | ||
1019 | width, height, block_height_log2, compbits_offset); | ||
1020 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d) invocations (%d, %d)", | ||
1021 | width, height, xtiles, ytiles, tilepitch, ytilesaligned); | ||
1022 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) grid (%d, %d)", | ||
1023 | wgx, wgy, gridw, gridh); | ||
1024 | |||
1025 | if (tilepitch % wgx != 0 || ytilesaligned % wgy != 0) { | ||
1026 | gk20a_warn(&g->dev->dev, | ||
1027 | "grid size (%d, %d) is not a multiple of work group size (%d, %d)", | ||
1028 | tilepitch, ytilesaligned, wgx, wgy); | ||
1029 | return -EINVAL; | ||
1030 | } | ||
1031 | |||
1032 | /* Write parameters */ | ||
1033 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1034 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1035 | WRITE_PATCH(PATCH_USER_CONST_XTILES, xtiles); | ||
1036 | WRITE_PATCH(PATCH_USER_CONST_YTILES, ytiles); | ||
1037 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, block_height_log2); | ||
1038 | WRITE_PATCH(PATCH_USER_CONST_DSTPITCH, dst_stride); | ||
1039 | WRITE_PATCH(PATCH_USER_CONST_DSTOFFSET, transpose ? 4 : 0); /* flag */ | ||
1040 | WRITE_PATCH(PATCH_VPC_CURRENT_GRID_SIZE_X, gridw); | ||
1041 | WRITE_PATCH(PATCH_VPC_CURRENT_GRID_SIZE_Y, gridh); | ||
1042 | WRITE_PATCH(PATCH_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1043 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1044 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1045 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1046 | WRITE_PATCH(PATCH_QMD_CTA_RASTER_WIDTH, gridw); | ||
1047 | WRITE_PATCH(PATCH_QMD_CTA_RASTER_HEIGHT, gridh); | ||
1048 | WRITE_PATCH(PATCH_QMD_CTA_RASTER_DEPTH, 1); | ||
1049 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1050 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1051 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION2, 1); | ||
1052 | #undef WRITE_PATCH | ||
1053 | |||
1054 | gk20a_busy(g->dev); | ||
1055 | err = gk20a_init_cde_support(g); | ||
1056 | if (err) | ||
1057 | goto out; | ||
1058 | err = gk20a_cde_convert(g, dmabuf, dmabuf, | ||
1059 | 0, /* dst kind */ | ||
1060 | compbits_offset, | ||
1061 | 0, /* dst_size, 0 = auto */ | ||
1062 | fence_in, submit_flags, | ||
1063 | params, param, fence_out); | ||
1064 | out: | ||
1065 | gk20a_idle(g->dev); | ||
1066 | return err; | ||
1067 | } | ||
1068 | |||
1069 | int gk20a_prepare_compressible_read( | ||
1070 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | ||
1071 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1072 | u32 width, u32 height, u32 block_height_log2, | ||
1073 | u32 submit_flags, struct nvhost_fence *fence, | ||
1074 | u32 *valid_compbits, struct gk20a_fence **fence_out) | ||
1075 | { | ||
1076 | int err = 0; | ||
1077 | struct gk20a_buffer_state *state; | ||
1078 | struct dma_buf *dmabuf; | ||
1079 | u32 missing_bits; | ||
1080 | |||
1081 | if (!g->cde_app.initialised) { | ||
1082 | err = gk20a_cde_reload(g); | ||
1083 | if (err) | ||
1084 | return err; | ||
1085 | } | ||
1086 | |||
1087 | dmabuf = dma_buf_get(buffer_fd); | ||
1088 | if (IS_ERR(dmabuf)) | ||
1089 | return -EINVAL; | ||
1090 | |||
1091 | err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state); | ||
1092 | if (err) { | ||
1093 | dma_buf_put(dmabuf); | ||
1094 | return err; | ||
1095 | } | ||
1096 | |||
1097 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1098 | |||
1099 | mutex_lock(&state->lock); | ||
1100 | |||
1101 | if (state->valid_compbits && request == NVHOST_GPU_COMPBITS_NONE) { | ||
1102 | |||
1103 | gk20a_fence_put(state->fence); | ||
1104 | state->fence = NULL; | ||
1105 | /* state->fence = decompress(); | ||
1106 | state->valid_compbits = 0; */ | ||
1107 | err = -EINVAL; | ||
1108 | goto out; | ||
1109 | } else if (missing_bits) { | ||
1110 | struct gk20a_fence *new_fence = NULL; | ||
1111 | if ((state->valid_compbits & NVHOST_GPU_COMPBITS_GPU) && | ||
1112 | (missing_bits & NVHOST_GPU_COMPBITS_CDEH)) { | ||
1113 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1114 | g, dmabuf, | ||
1115 | NVHOST_GPU_COMPBITS_CDEH, | ||
1116 | offset, compbits_hoffset, | ||
1117 | width, height, block_height_log2, | ||
1118 | submit_flags, fence, | ||
1119 | &new_fence); | ||
1120 | if (err) | ||
1121 | goto out; | ||
1122 | |||
1123 | /* CDEH bits generated, update state & fence */ | ||
1124 | gk20a_fence_put(state->fence); | ||
1125 | state->fence = new_fence; | ||
1126 | state->valid_compbits |= NVHOST_GPU_COMPBITS_CDEH; | ||
1127 | } | ||
1128 | if ((state->valid_compbits & NVHOST_GPU_COMPBITS_GPU) && | ||
1129 | (missing_bits & NVHOST_GPU_COMPBITS_CDEV)) { | ||
1130 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1131 | g, dmabuf, | ||
1132 | NVHOST_GPU_COMPBITS_CDEV, | ||
1133 | offset, compbits_voffset, | ||
1134 | width, height, block_height_log2, | ||
1135 | submit_flags, fence, | ||
1136 | &new_fence); | ||
1137 | if (err) | ||
1138 | goto out; | ||
1139 | |||
1140 | /* CDEH bits generated, update state & fence */ | ||
1141 | gk20a_fence_put(state->fence); | ||
1142 | state->fence = new_fence; | ||
1143 | state->valid_compbits |= NVHOST_GPU_COMPBITS_CDEV; | ||
1144 | } | ||
1145 | } | ||
1146 | |||
1147 | if (state->fence && fence_out) | ||
1148 | *fence_out = gk20a_fence_get(state->fence); | ||
1149 | |||
1150 | if (valid_compbits) | ||
1151 | *valid_compbits = state->valid_compbits; | ||
1152 | |||
1153 | out: | ||
1154 | mutex_unlock(&state->lock); | ||
1155 | dma_buf_put(dmabuf); | ||
1156 | return 0; | ||
1157 | } | ||
1158 | |||
1159 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1160 | u32 valid_compbits, u64 offset) | ||
1161 | { | ||
1162 | int err; | ||
1163 | struct gk20a_buffer_state *state; | ||
1164 | struct dma_buf *dmabuf; | ||
1165 | |||
1166 | dmabuf = dma_buf_get(buffer_fd); | ||
1167 | if (IS_ERR(dmabuf)) { | ||
1168 | dev_err(dev_from_gk20a(g), "invalid dmabuf"); | ||
1169 | return -EINVAL; | ||
1170 | } | ||
1171 | |||
1172 | err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state); | ||
1173 | if (err) { | ||
1174 | dev_err(dev_from_gk20a(g), "could not get state from dmabuf"); | ||
1175 | dma_buf_put(dmabuf); | ||
1176 | return err; | ||
1177 | } | ||
1178 | |||
1179 | mutex_lock(&state->lock); | ||
1180 | |||
1181 | /* Update the compbits state. */ | ||
1182 | state->valid_compbits = valid_compbits; | ||
1183 | |||
1184 | /* Discard previous compbit job fence. */ | ||
1185 | gk20a_fence_put(state->fence); | ||
1186 | state->fence = NULL; | ||
1187 | |||
1188 | mutex_unlock(&state->lock); | ||
1189 | dma_buf_put(dmabuf); | ||
1190 | return 0; | ||
1191 | } | ||
1192 | |||
1193 | static ssize_t gk20a_cde_reload_write(struct file *file, | ||
1194 | const char __user *userbuf, size_t count, loff_t *ppos) | ||
1195 | { | ||
1196 | struct gk20a *g = file->private_data; | ||
1197 | gk20a_cde_reload(g); | ||
1198 | return count; | ||
1199 | } | ||
1200 | |||
1201 | static const struct file_operations gk20a_cde_reload_fops = { | ||
1202 | .open = simple_open, | ||
1203 | .write = gk20a_cde_reload_write, | ||
1204 | }; | ||
1205 | |||
1206 | void gk20a_cde_debugfs_init(struct platform_device *dev) | ||
1207 | { | ||
1208 | struct gk20a_platform *platform = platform_get_drvdata(dev); | ||
1209 | struct gk20a *g = get_gk20a(dev); | ||
1210 | |||
1211 | debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs, | ||
1212 | g, &gk20a_cde_reload_fops); | ||
1213 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h index 784ae8b4..3782b44b 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |||
@@ -245,10 +245,20 @@ struct gk20a_cde_app { | |||
245 | int gk20a_cde_destroy(struct gk20a *g); | 245 | int gk20a_cde_destroy(struct gk20a *g); |
246 | int gk20a_init_cde_support(struct gk20a *g); | 246 | int gk20a_init_cde_support(struct gk20a *g); |
247 | int gk20a_cde_reload(struct gk20a *g); | 247 | int gk20a_cde_reload(struct gk20a *g); |
248 | int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | 248 | int gk20a_cde_convert(struct gk20a *g, struct dma_buf *src, struct dma_buf *dst, |
249 | s32 dst_kind, u64 dst_word_offset, | 249 | s32 dst_kind, u64 dst_word_offset, |
250 | u32 dst_size, struct nvhost_fence *fence, | 250 | u32 dst_size, struct nvhost_fence *fence, |
251 | u32 __flags, struct gk20a_cde_param *params, | 251 | u32 __flags, struct gk20a_cde_param *params, |
252 | int num_params, struct gk20a_fence **fence_out); | 252 | int num_params, struct gk20a_fence **fence_out); |
253 | void gk20a_cde_debugfs_init(struct platform_device *dev); | ||
254 | |||
255 | int gk20a_prepare_compressible_read( | ||
256 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | ||
257 | u64 compbits_hoffset, u64 compbits_voffset, | ||
258 | u32 width, u32 height, u32 block_height_log2, | ||
259 | u32 submit_flags, struct nvhost_fence *fence, | ||
260 | u32 *valid_compbits, struct gk20a_fence **fence_out); | ||
261 | int gk20a_mark_compressible_write( | ||
262 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset); | ||
253 | 263 | ||
254 | #endif | 264 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 9128959f..e5628c3f 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/nvhost_gpu_ioctl.h> | 21 | #include <linux/nvhost_gpu_ioctl.h> |
22 | 22 | ||
23 | #include "gk20a.h" | 23 | #include "gk20a.h" |
24 | #include "fence_gk20a.h" | ||
24 | 25 | ||
25 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | 26 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) |
26 | { | 27 | { |
@@ -78,6 +79,72 @@ gk20a_ctrl_ioctl_gpu_characteristics( | |||
78 | return err; | 79 | return err; |
79 | } | 80 | } |
80 | 81 | ||
82 | static int gk20a_ctrl_prepare_compressible_read( | ||
83 | struct gk20a *g, | ||
84 | struct nvhost_gpu_prepare_compressible_read_args *args) | ||
85 | { | ||
86 | struct nvhost_fence fence; | ||
87 | struct gk20a_fence *fence_out = NULL; | ||
88 | int ret = 0; | ||
89 | int flags = args->submit_flags; | ||
90 | |||
91 | fence.syncpt_id = args->fence.syncpt_id; | ||
92 | fence.value = args->fence.syncpt_value; | ||
93 | |||
94 | gk20a_busy(g->dev); | ||
95 | ret = gk20a_prepare_compressible_read(g, args->handle, | ||
96 | args->request_compbits, args->offset, | ||
97 | args->compbits_hoffset, args->compbits_voffset, | ||
98 | args->width, args->height, args->block_height_log2, | ||
99 | flags, &fence, &args->valid_compbits, | ||
100 | &fence_out); | ||
101 | gk20a_idle(g->dev); | ||
102 | |||
103 | if (ret) | ||
104 | return ret; | ||
105 | |||
106 | /* Convert fence_out to something we can pass back to user space. */ | ||
107 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { | ||
108 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | ||
109 | if (fence_out) { | ||
110 | int fd = gk20a_fence_install_fd(fence_out); | ||
111 | if (fd < 0) | ||
112 | ret = fd; | ||
113 | else | ||
114 | args->fence.fd = fd; | ||
115 | } else { | ||
116 | args->fence.fd = -1; | ||
117 | } | ||
118 | } else { | ||
119 | if (fence_out) { | ||
120 | args->fence.syncpt_id = fence_out->syncpt_id; | ||
121 | args->fence.syncpt_value = | ||
122 | fence_out->syncpt_value; | ||
123 | } else { | ||
124 | args->fence.syncpt_id = -1; | ||
125 | args->fence.syncpt_value = 0; | ||
126 | } | ||
127 | } | ||
128 | } | ||
129 | gk20a_fence_put(fence_out); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int gk20a_ctrl_mark_compressible_write( | ||
135 | struct gk20a *g, | ||
136 | struct nvhost_gpu_mark_compressible_write_args *args) | ||
137 | { | ||
138 | int ret = 0; | ||
139 | |||
140 | gk20a_busy(g->dev); | ||
141 | ret = gk20a_mark_compressible_write(g, args->handle, | ||
142 | args->valid_compbits, args->offset); | ||
143 | gk20a_idle(g->dev); | ||
144 | |||
145 | return ret; | ||
146 | } | ||
147 | |||
81 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 148 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
82 | { | 149 | { |
83 | struct platform_device *dev = filp->private_data; | 150 | struct platform_device *dev = filp->private_data; |
@@ -225,7 +292,14 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
225 | err = gk20a_ctrl_ioctl_gpu_characteristics( | 292 | err = gk20a_ctrl_ioctl_gpu_characteristics( |
226 | g, (struct nvhost_gpu_get_characteristics *)buf); | 293 | g, (struct nvhost_gpu_get_characteristics *)buf); |
227 | break; | 294 | break; |
228 | 295 | case NVHOST_GPU_IOCTL_PREPARE_COMPRESSIBLE_READ: | |
296 | err = gk20a_ctrl_prepare_compressible_read(g, | ||
297 | (struct nvhost_gpu_prepare_compressible_read_args *)buf); | ||
298 | break; | ||
299 | case NVHOST_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE: | ||
300 | err = gk20a_ctrl_mark_compressible_write(g, | ||
301 | (struct nvhost_gpu_mark_compressible_write_args *)buf); | ||
302 | break; | ||
229 | default: | 303 | default: |
230 | gk20a_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); | 304 | gk20a_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); |
231 | err = -ENOTTY; | 305 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 2975798f..4e3beb7c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1523,6 +1523,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1523 | platform->debugfs, | 1523 | platform->debugfs, |
1524 | &gk20a->timeouts_enabled); | 1524 | &gk20a->timeouts_enabled); |
1525 | gk20a_pmu_debugfs_init(dev); | 1525 | gk20a_pmu_debugfs_init(dev); |
1526 | gk20a_cde_debugfs_init(dev); | ||
1526 | #endif | 1527 | #endif |
1527 | 1528 | ||
1528 | #ifdef CONFIG_INPUT_CFBOOST | 1529 | #ifdef CONFIG_INPUT_CFBOOST |