summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/comptags.c25
-rw-r--r--drivers/gpu/nvgpu/common/linux/dmabuf.c4
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c3
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c136
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/comptags.h10
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vm.h17
6 files changed, 118 insertions, 77 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c
index f55989f7..92e8aa3e 100644
--- a/drivers/gpu/nvgpu/common/linux/comptags.c
+++ b/drivers/gpu/nvgpu/common/linux/comptags.c
@@ -46,7 +46,6 @@ int gk20a_alloc_comptags(struct gk20a *g,
46{ 46{
47 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, 47 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
48 buf->dev); 48 buf->dev);
49 u32 ctaglines_allocsize;
50 u32 offset; 49 u32 offset;
51 int err; 50 int err;
52 51
@@ -56,18 +55,24 @@ int gk20a_alloc_comptags(struct gk20a *g,
56 if (!lines) 55 if (!lines)
57 return -EINVAL; 56 return -EINVAL;
58 57
59 ctaglines_allocsize = lines;
60
61 /* store the allocator so we can use it when we free the ctags */ 58 /* store the allocator so we can use it when we free the ctags */
62 priv->comptag_allocator = allocator; 59 priv->comptag_allocator = allocator;
63 err = gk20a_comptaglines_alloc(allocator, &offset, 60 err = gk20a_comptaglines_alloc(allocator, &offset, lines);
64 ctaglines_allocsize); 61 if (!err) {
65 if (err) 62 priv->comptags.offset = offset;
66 return err; 63 priv->comptags.lines = lines;
64 } else {
65 priv->comptags.offset = 0;
66 priv->comptags.lines = 0;
67 }
67 68
68 priv->comptags.offset = offset; 69 /*
69 priv->comptags.lines = lines; 70 * We don't report an error here if comptag alloc failed. The
70 priv->comptags.allocated_lines = ctaglines_allocsize; 71 * caller will simply fallback to incompressible kinds. It
72 * would not be safe to re-allocate comptags anyways on
73 * successive calls, as that would break map aliasing.
74 */
75 priv->comptags.allocated = true;
71 76
72 return 0; 77 return 0;
73} 78}
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c
index 6b44ff55..08cf5f2b 100644
--- a/drivers/gpu/nvgpu/common/linux/dmabuf.c
+++ b/drivers/gpu/nvgpu/common/linux/dmabuf.c
@@ -41,11 +41,11 @@ static void gk20a_mm_delete_priv(void *_priv)
41 41
42 g = priv->g; 42 g = priv->g;
43 43
44 if (priv->comptags.lines) { 44 if (priv->comptags.allocated && priv->comptags.lines) {
45 BUG_ON(!priv->comptag_allocator); 45 BUG_ON(!priv->comptag_allocator);
46 gk20a_comptaglines_free(priv->comptag_allocator, 46 gk20a_comptaglines_free(priv->comptag_allocator,
47 priv->comptags.offset, 47 priv->comptags.offset,
48 priv->comptags.allocated_lines); 48 priv->comptags.lines);
49 } 49 }
50 50
51 /* Free buffer states */ 51 /* Free buffer states */
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index d6d86c94..8e464627 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -132,7 +132,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
132 nvgpu_log(g, gpu_dbg_map, 132 nvgpu_log(g, gpu_dbg_map,
133 "gv: 0x%04x_%08x + 0x%-7zu " 133 "gv: 0x%04x_%08x + 0x%-7zu "
134 "[dma: 0x%010llx, pa: 0x%010llx] " 134 "[dma: 0x%010llx, pa: 0x%010llx] "
135 "pgsz=%-3dKb as=%-2d ctags=%d start=%d " 135 "pgsz=%-3dKb as=%-2d "
136 "flags=0x%x apt=%s (reused)", 136 "flags=0x%x apt=%s (reused)",
137 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), 137 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
138 os_buf->dmabuf->size, 138 os_buf->dmabuf->size,
@@ -140,7 +140,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
140 (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), 140 (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
141 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 141 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
142 vm_aspace_id(vm), 142 vm_aspace_id(vm),
143 mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
144 mapped_buffer->flags, 143 mapped_buffer->flags,
145 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); 144 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
146 145
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index cfac4f8e..be7e4207 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -39,6 +39,20 @@
39#include "gk20a/gk20a.h" 39#include "gk20a/gk20a.h"
40#include "gk20a/mm_gk20a.h" 40#include "gk20a/mm_gk20a.h"
41 41
42struct nvgpu_ctag_buffer_info {
43 u64 size;
44 enum gmmu_pgsz_gk20a pgsz_idx;
45 u32 flags;
46
47 s16 compr_kind;
48 s16 incompr_kind;
49
50 u32 ctag_lines;
51};
52
53static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
54 struct nvgpu_ctag_buffer_info *binfo);
55
42static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, 56static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
43 struct vm_gk20a_mapping_batch *batch); 57 struct vm_gk20a_mapping_batch *batch);
44 58
@@ -731,11 +745,10 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
731 struct gk20a *g = gk20a_from_vm(vm); 745 struct gk20a *g = gk20a_from_vm(vm);
732 struct nvgpu_mapped_buf *mapped_buffer = NULL; 746 struct nvgpu_mapped_buf *mapped_buffer = NULL;
733 struct nvgpu_ctag_buffer_info binfo = { 0 }; 747 struct nvgpu_ctag_buffer_info binfo = { 0 };
734 struct gk20a_comptags comptags;
735 struct nvgpu_vm_area *vm_area = NULL; 748 struct nvgpu_vm_area *vm_area = NULL;
736 int err = 0; 749 int err = 0;
737 u64 align; 750 u64 align;
738 u32 ctag_offset; 751 u32 ctag_offset = 0;
739 bool clear_ctags = false; 752 bool clear_ctags = false;
740 bool va_allocated = true; 753 bool va_allocated = true;
741 754
@@ -746,6 +759,11 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
746 */ 759 */
747 s16 map_key_kind; 760 s16 map_key_kind;
748 761
762 /*
763 * The actual GMMU PTE kind
764 */
765 u8 pte_kind;
766
749 if (vm->userspace_managed && 767 if (vm->userspace_managed &&
750 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { 768 !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
751 nvgpu_err(g, 769 nvgpu_err(g,
@@ -835,57 +853,91 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
835 if (!vm->enable_ctag) 853 if (!vm->enable_ctag)
836 binfo.ctag_lines = 0; 854 binfo.ctag_lines = 0;
837 855
838 gk20a_get_comptags(os_buf, &comptags); 856 if (binfo.ctag_lines) {
857 struct gk20a_comptags comptags = { 0 };
839 858
840 if (binfo.ctag_lines && !comptags.lines) {
841 /* 859 /*
842 * Allocate compression resources if needed. 860 * Get the comptags state
843 */ 861 */
844 if (gk20a_alloc_comptags(g, 862 gk20a_get_comptags(os_buf, &comptags);
845 os_buf,
846 &g->gr.comp_tags,
847 binfo.ctag_lines)) {
848
849 /*
850 * Prevent compression...
851 */
852 binfo.compr_kind = NV_KIND_INVALID;
853
854 /*
855 * ... And make sure we have a fallback.
856 */
857 if (binfo.incompr_kind == NV_KIND_INVALID) {
858 nvgpu_err(g, "comptag alloc failed and no "
859 "fallback kind specified");
860 err = -ENOMEM;
861 863
864 /*
865 * Allocate if not yet allocated
866 */
867 if (!comptags.allocated) {
868 err = gk20a_alloc_comptags(g, os_buf,
869 &g->gr.comp_tags,
870 binfo.ctag_lines);
871 if (err) {
862 /* 872 /*
863 * Any alloced comptags are cleaned up when the 873 * This is an irrecoverable failure and we need
864 * dmabuf is freed. 874 * to abort. In particular, it is not safe to
875 * proceed with incompressible fallback, since
876 * we could not mark our alloc failure
877 * anywere. Later we would retry allocation and
878 * break compressible map aliasing.
865 */ 879 */
880 nvgpu_err(g,
881 "Error %d setting up comptags", err);
866 goto clean_up; 882 goto clean_up;
867 } 883 }
868 } else { 884
885 /*
886 * Refresh comptags state after alloc. Field
887 * comptags.lines will be 0 if alloc failed.
888 */
869 gk20a_get_comptags(os_buf, &comptags); 889 gk20a_get_comptags(os_buf, &comptags);
870 890
871 if (g->ops.ltc.cbc_ctrl) 891 /*
872 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, 892 * Newly allocated comptags needs to be cleared
873 comptags.offset, 893 */
874 comptags.offset + 894 if (comptags.lines) {
875 comptags.allocated_lines - 1); 895 if (g->ops.ltc.cbc_ctrl)
876 else 896 g->ops.ltc.cbc_ctrl(
877 clear_ctags = true; 897 g, gk20a_cbc_op_clear,
898 comptags.offset,
899 (comptags.offset +
900 comptags.lines - 1));
901 else
902 /*
903 * The comptags will be cleared as part
904 * of mapping (vgpu)
905 */
906 clear_ctags = true;
907 }
878 } 908 }
909
910 /*
911 * Store the ctag offset for later use if we got the comptags
912 */
913 if (comptags.lines)
914 ctag_offset = comptags.offset;
879 } 915 }
880 916
881 /* 917 /*
882 * Calculate comptag index for this mapping. Differs in case of partial 918 * Figure out the kind and ctag offset for the GMMU page tables
883 * mapping.
884 */ 919 */
885 ctag_offset = comptags.offset; 920 if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) {
886 if (ctag_offset) 921 /*
922 * Adjust the ctag_offset as per the buffer map offset
923 */
887 ctag_offset += phys_offset >> 924 ctag_offset += phys_offset >>
888 ilog2(g->ops.fb.compression_page_size(g)); 925 ilog2(g->ops.fb.compression_page_size(g));
926 pte_kind = binfo.compr_kind;
927 } else if (binfo.incompr_kind != NV_KIND_INVALID) {
928 /*
929 * Incompressible kind, ctag offset will not be programmed
930 */
931 ctag_offset = 0;
932 pte_kind = binfo.incompr_kind;
933 } else {
934 /*
935 * Caller required compression, but we cannot provide it
936 */
937 nvgpu_err(g, "No comptags and no incompressible fallback kind");
938 err = -ENOMEM;
939 goto clean_up;
940 }
889 941
890 map_addr = g->ops.mm.gmmu_map(vm, 942 map_addr = g->ops.mm.gmmu_map(vm,
891 map_addr, 943 map_addr,
@@ -893,8 +945,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
893 phys_offset, 945 phys_offset,
894 map_size, 946 map_size,
895 binfo.pgsz_idx, 947 binfo.pgsz_idx,
896 binfo.compr_kind != NV_KIND_INVALID ? 948 pte_kind,
897 binfo.compr_kind : binfo.incompr_kind,
898 ctag_offset, 949 ctag_offset,
899 flags, 950 flags,
900 rw, 951 rw,
@@ -913,9 +964,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
913 mapped_buffer->addr = map_addr; 964 mapped_buffer->addr = map_addr;
914 mapped_buffer->size = map_size; 965 mapped_buffer->size = map_size;
915 mapped_buffer->pgsz_idx = binfo.pgsz_idx; 966 mapped_buffer->pgsz_idx = binfo.pgsz_idx;
916 mapped_buffer->ctag_offset = ctag_offset;
917 mapped_buffer->ctag_lines = binfo.ctag_lines;
918 mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
919 mapped_buffer->vm = vm; 967 mapped_buffer->vm = vm;
920 mapped_buffer->flags = flags; 968 mapped_buffer->flags = flags;
921 mapped_buffer->kind = map_key_kind; 969 mapped_buffer->kind = map_key_kind;
@@ -1074,8 +1122,8 @@ done:
1074 return; 1122 return;
1075} 1123}
1076 1124
1077int nvgpu_vm_compute_compression(struct vm_gk20a *vm, 1125static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
1078 struct nvgpu_ctag_buffer_info *binfo) 1126 struct nvgpu_ctag_buffer_info *binfo)
1079{ 1127{
1080 bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID); 1128 bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);
1081 struct gk20a *g = gk20a_from_vm(vm); 1129 struct gk20a *g = gk20a_from_vm(vm);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
index 5482d0ce..2d9f034a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h
@@ -25,8 +25,14 @@ struct nvgpu_os_buffer;
25struct gk20a_comptags { 25struct gk20a_comptags {
26 u32 offset; 26 u32 offset;
27 u32 lines; 27 u32 lines;
28 u32 allocated_lines; 28
29 bool user_mappable; 29 /*
30 * This signals whether allocation has been attempted. Observe
31 * 'lines' to see whether comptags were actually allocated. We
32 * try alloc only once per buffer in order not to break
33 * multiple compressible-kind mappings.
34 */
35 bool allocated;
30}; 36};
31 37
32struct gk20a_comptag_allocator { 38struct gk20a_comptag_allocator {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index abb297ab..c0a4124c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -110,9 +110,6 @@ struct nvgpu_mapped_buf {
110 u64 size; 110 u64 size;
111 111
112 u32 pgsz_idx; 112 u32 pgsz_idx;
113 u32 ctag_offset;
114 u32 ctag_lines;
115 u32 ctag_allocated_lines;
116 113
117 u32 flags; 114 u32 flags;
118 u32 kind; 115 u32 kind;
@@ -143,17 +140,6 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
143 ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node)); 140 ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node));
144} 141}
145 142
146struct nvgpu_ctag_buffer_info {
147 u64 size;
148 enum gmmu_pgsz_gk20a pgsz_idx;
149 u32 flags;
150
151 s16 compr_kind;
152 s16 incompr_kind;
153
154 u32 ctag_lines;
155};
156
157struct vm_gk20a { 143struct vm_gk20a {
158 struct mm_gk20a *mm; 144 struct mm_gk20a *mm;
159 struct gk20a_as_share *as_share; /* as_share this represents */ 145 struct gk20a_as_share *as_share; /* as_share this represents */
@@ -221,9 +207,6 @@ void nvgpu_vm_put(struct vm_gk20a *vm);
221int vm_aspace_id(struct vm_gk20a *vm); 207int vm_aspace_id(struct vm_gk20a *vm);
222int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); 208int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
223 209
224int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
225 struct nvgpu_ctag_buffer_info *binfo);
226
227/* batching eliminates redundant cache flushes and invalidates */ 210/* batching eliminates redundant cache flushes and invalidates */
228void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); 211void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
229void nvgpu_vm_mapping_batch_finish( 212void nvgpu_vm_mapping_batch_finish(