diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/comptags.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dmabuf.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 136 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/comptags.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/vm.h | 17 |
6 files changed, 118 insertions, 77 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c index f55989f7..92e8aa3e 100644 --- a/drivers/gpu/nvgpu/common/linux/comptags.c +++ b/drivers/gpu/nvgpu/common/linux/comptags.c | |||
@@ -46,7 +46,6 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
46 | { | 46 | { |
47 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | 47 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, |
48 | buf->dev); | 48 | buf->dev); |
49 | u32 ctaglines_allocsize; | ||
50 | u32 offset; | 49 | u32 offset; |
51 | int err; | 50 | int err; |
52 | 51 | ||
@@ -56,18 +55,24 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
56 | if (!lines) | 55 | if (!lines) |
57 | return -EINVAL; | 56 | return -EINVAL; |
58 | 57 | ||
59 | ctaglines_allocsize = lines; | ||
60 | |||
61 | /* store the allocator so we can use it when we free the ctags */ | 58 | /* store the allocator so we can use it when we free the ctags */ |
62 | priv->comptag_allocator = allocator; | 59 | priv->comptag_allocator = allocator; |
63 | err = gk20a_comptaglines_alloc(allocator, &offset, | 60 | err = gk20a_comptaglines_alloc(allocator, &offset, lines); |
64 | ctaglines_allocsize); | 61 | if (!err) { |
65 | if (err) | 62 | priv->comptags.offset = offset; |
66 | return err; | 63 | priv->comptags.lines = lines; |
64 | } else { | ||
65 | priv->comptags.offset = 0; | ||
66 | priv->comptags.lines = 0; | ||
67 | } | ||
67 | 68 | ||
68 | priv->comptags.offset = offset; | 69 | /* |
69 | priv->comptags.lines = lines; | 70 | * We don't report an error here if comptag alloc failed. The |
70 | priv->comptags.allocated_lines = ctaglines_allocsize; | 71 | * caller will simply fallback to incompressible kinds. It |
72 | * would not be safe to re-allocate comptags anyways on | ||
73 | * successive calls, as that would break map aliasing. | ||
74 | */ | ||
75 | priv->comptags.allocated = true; | ||
71 | 76 | ||
72 | return 0; | 77 | return 0; |
73 | } | 78 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c index 6b44ff55..08cf5f2b 100644 --- a/drivers/gpu/nvgpu/common/linux/dmabuf.c +++ b/drivers/gpu/nvgpu/common/linux/dmabuf.c | |||
@@ -41,11 +41,11 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
41 | 41 | ||
42 | g = priv->g; | 42 | g = priv->g; |
43 | 43 | ||
44 | if (priv->comptags.lines) { | 44 | if (priv->comptags.allocated && priv->comptags.lines) { |
45 | BUG_ON(!priv->comptag_allocator); | 45 | BUG_ON(!priv->comptag_allocator); |
46 | gk20a_comptaglines_free(priv->comptag_allocator, | 46 | gk20a_comptaglines_free(priv->comptag_allocator, |
47 | priv->comptags.offset, | 47 | priv->comptags.offset, |
48 | priv->comptags.allocated_lines); | 48 | priv->comptags.lines); |
49 | } | 49 | } |
50 | 50 | ||
51 | /* Free buffer states */ | 51 | /* Free buffer states */ |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index d6d86c94..8e464627 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -132,7 +132,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
132 | nvgpu_log(g, gpu_dbg_map, | 132 | nvgpu_log(g, gpu_dbg_map, |
133 | "gv: 0x%04x_%08x + 0x%-7zu " | 133 | "gv: 0x%04x_%08x + 0x%-7zu " |
134 | "[dma: 0x%010llx, pa: 0x%010llx] " | 134 | "[dma: 0x%010llx, pa: 0x%010llx] " |
135 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | 135 | "pgsz=%-3dKb as=%-2d " |
136 | "flags=0x%x apt=%s (reused)", | 136 | "flags=0x%x apt=%s (reused)", |
137 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | 137 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), |
138 | os_buf->dmabuf->size, | 138 | os_buf->dmabuf->size, |
@@ -140,7 +140,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
140 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), | 140 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), |
141 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 141 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
142 | vm_aspace_id(vm), | 142 | vm_aspace_id(vm), |
143 | mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, | ||
144 | mapped_buffer->flags, | 143 | mapped_buffer->flags, |
145 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | 144 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); |
146 | 145 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index cfac4f8e..be7e4207 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -39,6 +39,20 @@ | |||
39 | #include "gk20a/gk20a.h" | 39 | #include "gk20a/gk20a.h" |
40 | #include "gk20a/mm_gk20a.h" | 40 | #include "gk20a/mm_gk20a.h" |
41 | 41 | ||
42 | struct nvgpu_ctag_buffer_info { | ||
43 | u64 size; | ||
44 | enum gmmu_pgsz_gk20a pgsz_idx; | ||
45 | u32 flags; | ||
46 | |||
47 | s16 compr_kind; | ||
48 | s16 incompr_kind; | ||
49 | |||
50 | u32 ctag_lines; | ||
51 | }; | ||
52 | |||
53 | static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | ||
54 | struct nvgpu_ctag_buffer_info *binfo); | ||
55 | |||
42 | static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, | 56 | static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, |
43 | struct vm_gk20a_mapping_batch *batch); | 57 | struct vm_gk20a_mapping_batch *batch); |
44 | 58 | ||
@@ -731,11 +745,10 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
731 | struct gk20a *g = gk20a_from_vm(vm); | 745 | struct gk20a *g = gk20a_from_vm(vm); |
732 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | 746 | struct nvgpu_mapped_buf *mapped_buffer = NULL; |
733 | struct nvgpu_ctag_buffer_info binfo = { 0 }; | 747 | struct nvgpu_ctag_buffer_info binfo = { 0 }; |
734 | struct gk20a_comptags comptags; | ||
735 | struct nvgpu_vm_area *vm_area = NULL; | 748 | struct nvgpu_vm_area *vm_area = NULL; |
736 | int err = 0; | 749 | int err = 0; |
737 | u64 align; | 750 | u64 align; |
738 | u32 ctag_offset; | 751 | u32 ctag_offset = 0; |
739 | bool clear_ctags = false; | 752 | bool clear_ctags = false; |
740 | bool va_allocated = true; | 753 | bool va_allocated = true; |
741 | 754 | ||
@@ -746,6 +759,11 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
746 | */ | 759 | */ |
747 | s16 map_key_kind; | 760 | s16 map_key_kind; |
748 | 761 | ||
762 | /* | ||
763 | * The actual GMMU PTE kind | ||
764 | */ | ||
765 | u8 pte_kind; | ||
766 | |||
749 | if (vm->userspace_managed && | 767 | if (vm->userspace_managed && |
750 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | 768 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { |
751 | nvgpu_err(g, | 769 | nvgpu_err(g, |
@@ -835,57 +853,91 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
835 | if (!vm->enable_ctag) | 853 | if (!vm->enable_ctag) |
836 | binfo.ctag_lines = 0; | 854 | binfo.ctag_lines = 0; |
837 | 855 | ||
838 | gk20a_get_comptags(os_buf, &comptags); | 856 | if (binfo.ctag_lines) { |
857 | struct gk20a_comptags comptags = { 0 }; | ||
839 | 858 | ||
840 | if (binfo.ctag_lines && !comptags.lines) { | ||
841 | /* | 859 | /* |
842 | * Allocate compression resources if needed. | 860 | * Get the comptags state |
843 | */ | 861 | */ |
844 | if (gk20a_alloc_comptags(g, | 862 | gk20a_get_comptags(os_buf, &comptags); |
845 | os_buf, | ||
846 | &g->gr.comp_tags, | ||
847 | binfo.ctag_lines)) { | ||
848 | |||
849 | /* | ||
850 | * Prevent compression... | ||
851 | */ | ||
852 | binfo.compr_kind = NV_KIND_INVALID; | ||
853 | |||
854 | /* | ||
855 | * ... And make sure we have a fallback. | ||
856 | */ | ||
857 | if (binfo.incompr_kind == NV_KIND_INVALID) { | ||
858 | nvgpu_err(g, "comptag alloc failed and no " | ||
859 | "fallback kind specified"); | ||
860 | err = -ENOMEM; | ||
861 | 863 | ||
864 | /* | ||
865 | * Allocate if not yet allocated | ||
866 | */ | ||
867 | if (!comptags.allocated) { | ||
868 | err = gk20a_alloc_comptags(g, os_buf, | ||
869 | &g->gr.comp_tags, | ||
870 | binfo.ctag_lines); | ||
871 | if (err) { | ||
862 | /* | 872 | /* |
863 | * Any alloced comptags are cleaned up when the | 873 | * This is an irrecoverable failure and we need |
864 | * dmabuf is freed. | 874 | * to abort. In particular, it is not safe to |
875 | * proceed with incompressible fallback, since | ||
876 | * we could not mark our alloc failure | ||
877 | * anywere. Later we would retry allocation and | ||
878 | * break compressible map aliasing. | ||
865 | */ | 879 | */ |
880 | nvgpu_err(g, | ||
881 | "Error %d setting up comptags", err); | ||
866 | goto clean_up; | 882 | goto clean_up; |
867 | } | 883 | } |
868 | } else { | 884 | |
885 | /* | ||
886 | * Refresh comptags state after alloc. Field | ||
887 | * comptags.lines will be 0 if alloc failed. | ||
888 | */ | ||
869 | gk20a_get_comptags(os_buf, &comptags); | 889 | gk20a_get_comptags(os_buf, &comptags); |
870 | 890 | ||
871 | if (g->ops.ltc.cbc_ctrl) | 891 | /* |
872 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | 892 | * Newly allocated comptags needs to be cleared |
873 | comptags.offset, | 893 | */ |
874 | comptags.offset + | 894 | if (comptags.lines) { |
875 | comptags.allocated_lines - 1); | 895 | if (g->ops.ltc.cbc_ctrl) |
876 | else | 896 | g->ops.ltc.cbc_ctrl( |
877 | clear_ctags = true; | 897 | g, gk20a_cbc_op_clear, |
898 | comptags.offset, | ||
899 | (comptags.offset + | ||
900 | comptags.lines - 1)); | ||
901 | else | ||
902 | /* | ||
903 | * The comptags will be cleared as part | ||
904 | * of mapping (vgpu) | ||
905 | */ | ||
906 | clear_ctags = true; | ||
907 | } | ||
878 | } | 908 | } |
909 | |||
910 | /* | ||
911 | * Store the ctag offset for later use if we got the comptags | ||
912 | */ | ||
913 | if (comptags.lines) | ||
914 | ctag_offset = comptags.offset; | ||
879 | } | 915 | } |
880 | 916 | ||
881 | /* | 917 | /* |
882 | * Calculate comptag index for this mapping. Differs in case of partial | 918 | * Figure out the kind and ctag offset for the GMMU page tables |
883 | * mapping. | ||
884 | */ | 919 | */ |
885 | ctag_offset = comptags.offset; | 920 | if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) { |
886 | if (ctag_offset) | 921 | /* |
922 | * Adjust the ctag_offset as per the buffer map offset | ||
923 | */ | ||
887 | ctag_offset += phys_offset >> | 924 | ctag_offset += phys_offset >> |
888 | ilog2(g->ops.fb.compression_page_size(g)); | 925 | ilog2(g->ops.fb.compression_page_size(g)); |
926 | pte_kind = binfo.compr_kind; | ||
927 | } else if (binfo.incompr_kind != NV_KIND_INVALID) { | ||
928 | /* | ||
929 | * Incompressible kind, ctag offset will not be programmed | ||
930 | */ | ||
931 | ctag_offset = 0; | ||
932 | pte_kind = binfo.incompr_kind; | ||
933 | } else { | ||
934 | /* | ||
935 | * Caller required compression, but we cannot provide it | ||
936 | */ | ||
937 | nvgpu_err(g, "No comptags and no incompressible fallback kind"); | ||
938 | err = -ENOMEM; | ||
939 | goto clean_up; | ||
940 | } | ||
889 | 941 | ||
890 | map_addr = g->ops.mm.gmmu_map(vm, | 942 | map_addr = g->ops.mm.gmmu_map(vm, |
891 | map_addr, | 943 | map_addr, |
@@ -893,8 +945,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
893 | phys_offset, | 945 | phys_offset, |
894 | map_size, | 946 | map_size, |
895 | binfo.pgsz_idx, | 947 | binfo.pgsz_idx, |
896 | binfo.compr_kind != NV_KIND_INVALID ? | 948 | pte_kind, |
897 | binfo.compr_kind : binfo.incompr_kind, | ||
898 | ctag_offset, | 949 | ctag_offset, |
899 | flags, | 950 | flags, |
900 | rw, | 951 | rw, |
@@ -913,9 +964,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
913 | mapped_buffer->addr = map_addr; | 964 | mapped_buffer->addr = map_addr; |
914 | mapped_buffer->size = map_size; | 965 | mapped_buffer->size = map_size; |
915 | mapped_buffer->pgsz_idx = binfo.pgsz_idx; | 966 | mapped_buffer->pgsz_idx = binfo.pgsz_idx; |
916 | mapped_buffer->ctag_offset = ctag_offset; | ||
917 | mapped_buffer->ctag_lines = binfo.ctag_lines; | ||
918 | mapped_buffer->ctag_allocated_lines = comptags.allocated_lines; | ||
919 | mapped_buffer->vm = vm; | 967 | mapped_buffer->vm = vm; |
920 | mapped_buffer->flags = flags; | 968 | mapped_buffer->flags = flags; |
921 | mapped_buffer->kind = map_key_kind; | 969 | mapped_buffer->kind = map_key_kind; |
@@ -1074,8 +1122,8 @@ done: | |||
1074 | return; | 1122 | return; |
1075 | } | 1123 | } |
1076 | 1124 | ||
1077 | int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | 1125 | static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, |
1078 | struct nvgpu_ctag_buffer_info *binfo) | 1126 | struct nvgpu_ctag_buffer_info *binfo) |
1079 | { | 1127 | { |
1080 | bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID); | 1128 | bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID); |
1081 | struct gk20a *g = gk20a_from_vm(vm); | 1129 | struct gk20a *g = gk20a_from_vm(vm); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/comptags.h b/drivers/gpu/nvgpu/include/nvgpu/comptags.h index 5482d0ce..2d9f034a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/comptags.h +++ b/drivers/gpu/nvgpu/include/nvgpu/comptags.h | |||
@@ -25,8 +25,14 @@ struct nvgpu_os_buffer; | |||
25 | struct gk20a_comptags { | 25 | struct gk20a_comptags { |
26 | u32 offset; | 26 | u32 offset; |
27 | u32 lines; | 27 | u32 lines; |
28 | u32 allocated_lines; | 28 | |
29 | bool user_mappable; | 29 | /* |
30 | * This signals whether allocation has been attempted. Observe | ||
31 | * 'lines' to see whether comptags were actually allocated. We | ||
32 | * try alloc only once per buffer in order not to break | ||
33 | * multiple compressible-kind mappings. | ||
34 | */ | ||
35 | bool allocated; | ||
30 | }; | 36 | }; |
31 | 37 | ||
32 | struct gk20a_comptag_allocator { | 38 | struct gk20a_comptag_allocator { |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index abb297ab..c0a4124c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h | |||
@@ -110,9 +110,6 @@ struct nvgpu_mapped_buf { | |||
110 | u64 size; | 110 | u64 size; |
111 | 111 | ||
112 | u32 pgsz_idx; | 112 | u32 pgsz_idx; |
113 | u32 ctag_offset; | ||
114 | u32 ctag_lines; | ||
115 | u32 ctag_allocated_lines; | ||
116 | 113 | ||
117 | u32 flags; | 114 | u32 flags; |
118 | u32 kind; | 115 | u32 kind; |
@@ -143,17 +140,6 @@ mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node) | |||
143 | ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node)); | 140 | ((uintptr_t)node - offsetof(struct nvgpu_mapped_buf, node)); |
144 | } | 141 | } |
145 | 142 | ||
146 | struct nvgpu_ctag_buffer_info { | ||
147 | u64 size; | ||
148 | enum gmmu_pgsz_gk20a pgsz_idx; | ||
149 | u32 flags; | ||
150 | |||
151 | s16 compr_kind; | ||
152 | s16 incompr_kind; | ||
153 | |||
154 | u32 ctag_lines; | ||
155 | }; | ||
156 | |||
157 | struct vm_gk20a { | 143 | struct vm_gk20a { |
158 | struct mm_gk20a *mm; | 144 | struct mm_gk20a *mm; |
159 | struct gk20a_as_share *as_share; /* as_share this represents */ | 145 | struct gk20a_as_share *as_share; /* as_share this represents */ |
@@ -221,9 +207,6 @@ void nvgpu_vm_put(struct vm_gk20a *vm); | |||
221 | int vm_aspace_id(struct vm_gk20a *vm); | 207 | int vm_aspace_id(struct vm_gk20a *vm); |
222 | int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); | 208 | int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); |
223 | 209 | ||
224 | int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | ||
225 | struct nvgpu_ctag_buffer_info *binfo); | ||
226 | |||
227 | /* batching eliminates redundant cache flushes and invalidates */ | 210 | /* batching eliminates redundant cache flushes and invalidates */ |
228 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); | 211 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); |
229 | void nvgpu_vm_mapping_batch_finish( | 212 | void nvgpu_vm_mapping_batch_finish( |