diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2017-11-08 13:55:47 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-15 16:25:58 -0500 |
commit | 434385ca54053b13ac06a5f11cb7564d6740f02d (patch) | |
tree | 04cbfac9575af77c90e11da0fd8488fa221f2581 /drivers/gpu/nvgpu/common | |
parent | 3ff666c4b97c5ad869aefe0d9c335c187962c20e (diff) |
gpu: nvgpu: Clean up comptag data structs and alloc
Clean up the comptag-related data structures and allocation logic. The
most important change is that we only ever try comptag allocation once
to prevent incorrect map aliasing.
If we were to retry the allocation on further map calls, the following
situation would become possible:
(1) Request compressible kind mapping for a buffer. Comptag alloc failed
and we proceed with incompressible kind fallback.
(2) Request another compressible kind mapping for a buffer. Comptag alloc
retry succeeded and now we use the compressible kind.
(3) After writes through the compressible kind mapping, the buffer is no
longer legible via the fallback incompressible kind mapping.
The other changes are about removing the unused comptag-related fields
in gk20a_comptags and nvgpu_mapped_buf, and retrieving comptags info
only for compressible buffers. We also make nvgpu_ctag_buffer_info and
nvgpu_vm_compute_compression as private mm/vm.c definitions, since
they're not used elsewhere.
Bug 1902982
Change-Id: I0c9fe48ccc585a80dd2c05ec606a079c1c1d41f1
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1595153
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/comptags.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dmabuf.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 136 |
4 files changed, 110 insertions, 58 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/comptags.c b/drivers/gpu/nvgpu/common/linux/comptags.c index f55989f7..92e8aa3e 100644 --- a/drivers/gpu/nvgpu/common/linux/comptags.c +++ b/drivers/gpu/nvgpu/common/linux/comptags.c | |||
@@ -46,7 +46,6 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
46 | { | 46 | { |
47 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, | 47 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf, |
48 | buf->dev); | 48 | buf->dev); |
49 | u32 ctaglines_allocsize; | ||
50 | u32 offset; | 49 | u32 offset; |
51 | int err; | 50 | int err; |
52 | 51 | ||
@@ -56,18 +55,24 @@ int gk20a_alloc_comptags(struct gk20a *g, | |||
56 | if (!lines) | 55 | if (!lines) |
57 | return -EINVAL; | 56 | return -EINVAL; |
58 | 57 | ||
59 | ctaglines_allocsize = lines; | ||
60 | |||
61 | /* store the allocator so we can use it when we free the ctags */ | 58 | /* store the allocator so we can use it when we free the ctags */ |
62 | priv->comptag_allocator = allocator; | 59 | priv->comptag_allocator = allocator; |
63 | err = gk20a_comptaglines_alloc(allocator, &offset, | 60 | err = gk20a_comptaglines_alloc(allocator, &offset, lines); |
64 | ctaglines_allocsize); | 61 | if (!err) { |
65 | if (err) | 62 | priv->comptags.offset = offset; |
66 | return err; | 63 | priv->comptags.lines = lines; |
64 | } else { | ||
65 | priv->comptags.offset = 0; | ||
66 | priv->comptags.lines = 0; | ||
67 | } | ||
67 | 68 | ||
68 | priv->comptags.offset = offset; | 69 | /* |
69 | priv->comptags.lines = lines; | 70 | * We don't report an error here if comptag alloc failed. The |
70 | priv->comptags.allocated_lines = ctaglines_allocsize; | 71 | * caller will simply fallback to incompressible kinds. It |
72 | * would not be safe to re-allocate comptags anyways on | ||
73 | * successive calls, as that would break map aliasing. | ||
74 | */ | ||
75 | priv->comptags.allocated = true; | ||
71 | 76 | ||
72 | return 0; | 77 | return 0; |
73 | } | 78 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/dmabuf.c b/drivers/gpu/nvgpu/common/linux/dmabuf.c index 6b44ff55..08cf5f2b 100644 --- a/drivers/gpu/nvgpu/common/linux/dmabuf.c +++ b/drivers/gpu/nvgpu/common/linux/dmabuf.c | |||
@@ -41,11 +41,11 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
41 | 41 | ||
42 | g = priv->g; | 42 | g = priv->g; |
43 | 43 | ||
44 | if (priv->comptags.lines) { | 44 | if (priv->comptags.allocated && priv->comptags.lines) { |
45 | BUG_ON(!priv->comptag_allocator); | 45 | BUG_ON(!priv->comptag_allocator); |
46 | gk20a_comptaglines_free(priv->comptag_allocator, | 46 | gk20a_comptaglines_free(priv->comptag_allocator, |
47 | priv->comptags.offset, | 47 | priv->comptags.offset, |
48 | priv->comptags.allocated_lines); | 48 | priv->comptags.lines); |
49 | } | 49 | } |
50 | 50 | ||
51 | /* Free buffer states */ | 51 | /* Free buffer states */ |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index d6d86c94..8e464627 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -132,7 +132,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
132 | nvgpu_log(g, gpu_dbg_map, | 132 | nvgpu_log(g, gpu_dbg_map, |
133 | "gv: 0x%04x_%08x + 0x%-7zu " | 133 | "gv: 0x%04x_%08x + 0x%-7zu " |
134 | "[dma: 0x%010llx, pa: 0x%010llx] " | 134 | "[dma: 0x%010llx, pa: 0x%010llx] " |
135 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | 135 | "pgsz=%-3dKb as=%-2d " |
136 | "flags=0x%x apt=%s (reused)", | 136 | "flags=0x%x apt=%s (reused)", |
137 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), | 137 | u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr), |
138 | os_buf->dmabuf->size, | 138 | os_buf->dmabuf->size, |
@@ -140,7 +140,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
140 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), | 140 | (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl), |
141 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 141 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
142 | vm_aspace_id(vm), | 142 | vm_aspace_id(vm), |
143 | mapped_buffer->ctag_lines, mapped_buffer->ctag_offset, | ||
144 | mapped_buffer->flags, | 143 | mapped_buffer->flags, |
145 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | 144 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); |
146 | 145 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index cfac4f8e..be7e4207 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -39,6 +39,20 @@ | |||
39 | #include "gk20a/gk20a.h" | 39 | #include "gk20a/gk20a.h" |
40 | #include "gk20a/mm_gk20a.h" | 40 | #include "gk20a/mm_gk20a.h" |
41 | 41 | ||
42 | struct nvgpu_ctag_buffer_info { | ||
43 | u64 size; | ||
44 | enum gmmu_pgsz_gk20a pgsz_idx; | ||
45 | u32 flags; | ||
46 | |||
47 | s16 compr_kind; | ||
48 | s16 incompr_kind; | ||
49 | |||
50 | u32 ctag_lines; | ||
51 | }; | ||
52 | |||
53 | static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | ||
54 | struct nvgpu_ctag_buffer_info *binfo); | ||
55 | |||
42 | static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, | 56 | static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer, |
43 | struct vm_gk20a_mapping_batch *batch); | 57 | struct vm_gk20a_mapping_batch *batch); |
44 | 58 | ||
@@ -731,11 +745,10 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
731 | struct gk20a *g = gk20a_from_vm(vm); | 745 | struct gk20a *g = gk20a_from_vm(vm); |
732 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | 746 | struct nvgpu_mapped_buf *mapped_buffer = NULL; |
733 | struct nvgpu_ctag_buffer_info binfo = { 0 }; | 747 | struct nvgpu_ctag_buffer_info binfo = { 0 }; |
734 | struct gk20a_comptags comptags; | ||
735 | struct nvgpu_vm_area *vm_area = NULL; | 748 | struct nvgpu_vm_area *vm_area = NULL; |
736 | int err = 0; | 749 | int err = 0; |
737 | u64 align; | 750 | u64 align; |
738 | u32 ctag_offset; | 751 | u32 ctag_offset = 0; |
739 | bool clear_ctags = false; | 752 | bool clear_ctags = false; |
740 | bool va_allocated = true; | 753 | bool va_allocated = true; |
741 | 754 | ||
@@ -746,6 +759,11 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
746 | */ | 759 | */ |
747 | s16 map_key_kind; | 760 | s16 map_key_kind; |
748 | 761 | ||
762 | /* | ||
763 | * The actual GMMU PTE kind | ||
764 | */ | ||
765 | u8 pte_kind; | ||
766 | |||
749 | if (vm->userspace_managed && | 767 | if (vm->userspace_managed && |
750 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { | 768 | !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) { |
751 | nvgpu_err(g, | 769 | nvgpu_err(g, |
@@ -835,57 +853,91 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
835 | if (!vm->enable_ctag) | 853 | if (!vm->enable_ctag) |
836 | binfo.ctag_lines = 0; | 854 | binfo.ctag_lines = 0; |
837 | 855 | ||
838 | gk20a_get_comptags(os_buf, &comptags); | 856 | if (binfo.ctag_lines) { |
857 | struct gk20a_comptags comptags = { 0 }; | ||
839 | 858 | ||
840 | if (binfo.ctag_lines && !comptags.lines) { | ||
841 | /* | 859 | /* |
842 | * Allocate compression resources if needed. | 860 | * Get the comptags state |
843 | */ | 861 | */ |
844 | if (gk20a_alloc_comptags(g, | 862 | gk20a_get_comptags(os_buf, &comptags); |
845 | os_buf, | ||
846 | &g->gr.comp_tags, | ||
847 | binfo.ctag_lines)) { | ||
848 | |||
849 | /* | ||
850 | * Prevent compression... | ||
851 | */ | ||
852 | binfo.compr_kind = NV_KIND_INVALID; | ||
853 | |||
854 | /* | ||
855 | * ... And make sure we have a fallback. | ||
856 | */ | ||
857 | if (binfo.incompr_kind == NV_KIND_INVALID) { | ||
858 | nvgpu_err(g, "comptag alloc failed and no " | ||
859 | "fallback kind specified"); | ||
860 | err = -ENOMEM; | ||
861 | 863 | ||
864 | /* | ||
865 | * Allocate if not yet allocated | ||
866 | */ | ||
867 | if (!comptags.allocated) { | ||
868 | err = gk20a_alloc_comptags(g, os_buf, | ||
869 | &g->gr.comp_tags, | ||
870 | binfo.ctag_lines); | ||
871 | if (err) { | ||
862 | /* | 872 | /* |
863 | * Any alloced comptags are cleaned up when the | 873 | * This is an irrecoverable failure and we need |
864 | * dmabuf is freed. | 874 | * to abort. In particular, it is not safe to |
875 | * proceed with incompressible fallback, since | ||
876 | * we could not mark our alloc failure | ||
877 | * anywere. Later we would retry allocation and | ||
878 | * break compressible map aliasing. | ||
865 | */ | 879 | */ |
880 | nvgpu_err(g, | ||
881 | "Error %d setting up comptags", err); | ||
866 | goto clean_up; | 882 | goto clean_up; |
867 | } | 883 | } |
868 | } else { | 884 | |
885 | /* | ||
886 | * Refresh comptags state after alloc. Field | ||
887 | * comptags.lines will be 0 if alloc failed. | ||
888 | */ | ||
869 | gk20a_get_comptags(os_buf, &comptags); | 889 | gk20a_get_comptags(os_buf, &comptags); |
870 | 890 | ||
871 | if (g->ops.ltc.cbc_ctrl) | 891 | /* |
872 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | 892 | * Newly allocated comptags needs to be cleared |
873 | comptags.offset, | 893 | */ |
874 | comptags.offset + | 894 | if (comptags.lines) { |
875 | comptags.allocated_lines - 1); | 895 | if (g->ops.ltc.cbc_ctrl) |
876 | else | 896 | g->ops.ltc.cbc_ctrl( |
877 | clear_ctags = true; | 897 | g, gk20a_cbc_op_clear, |
898 | comptags.offset, | ||
899 | (comptags.offset + | ||
900 | comptags.lines - 1)); | ||
901 | else | ||
902 | /* | ||
903 | * The comptags will be cleared as part | ||
904 | * of mapping (vgpu) | ||
905 | */ | ||
906 | clear_ctags = true; | ||
907 | } | ||
878 | } | 908 | } |
909 | |||
910 | /* | ||
911 | * Store the ctag offset for later use if we got the comptags | ||
912 | */ | ||
913 | if (comptags.lines) | ||
914 | ctag_offset = comptags.offset; | ||
879 | } | 915 | } |
880 | 916 | ||
881 | /* | 917 | /* |
882 | * Calculate comptag index for this mapping. Differs in case of partial | 918 | * Figure out the kind and ctag offset for the GMMU page tables |
883 | * mapping. | ||
884 | */ | 919 | */ |
885 | ctag_offset = comptags.offset; | 920 | if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) { |
886 | if (ctag_offset) | 921 | /* |
922 | * Adjust the ctag_offset as per the buffer map offset | ||
923 | */ | ||
887 | ctag_offset += phys_offset >> | 924 | ctag_offset += phys_offset >> |
888 | ilog2(g->ops.fb.compression_page_size(g)); | 925 | ilog2(g->ops.fb.compression_page_size(g)); |
926 | pte_kind = binfo.compr_kind; | ||
927 | } else if (binfo.incompr_kind != NV_KIND_INVALID) { | ||
928 | /* | ||
929 | * Incompressible kind, ctag offset will not be programmed | ||
930 | */ | ||
931 | ctag_offset = 0; | ||
932 | pte_kind = binfo.incompr_kind; | ||
933 | } else { | ||
934 | /* | ||
935 | * Caller required compression, but we cannot provide it | ||
936 | */ | ||
937 | nvgpu_err(g, "No comptags and no incompressible fallback kind"); | ||
938 | err = -ENOMEM; | ||
939 | goto clean_up; | ||
940 | } | ||
889 | 941 | ||
890 | map_addr = g->ops.mm.gmmu_map(vm, | 942 | map_addr = g->ops.mm.gmmu_map(vm, |
891 | map_addr, | 943 | map_addr, |
@@ -893,8 +945,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
893 | phys_offset, | 945 | phys_offset, |
894 | map_size, | 946 | map_size, |
895 | binfo.pgsz_idx, | 947 | binfo.pgsz_idx, |
896 | binfo.compr_kind != NV_KIND_INVALID ? | 948 | pte_kind, |
897 | binfo.compr_kind : binfo.incompr_kind, | ||
898 | ctag_offset, | 949 | ctag_offset, |
899 | flags, | 950 | flags, |
900 | rw, | 951 | rw, |
@@ -913,9 +964,6 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, | |||
913 | mapped_buffer->addr = map_addr; | 964 | mapped_buffer->addr = map_addr; |
914 | mapped_buffer->size = map_size; | 965 | mapped_buffer->size = map_size; |
915 | mapped_buffer->pgsz_idx = binfo.pgsz_idx; | 966 | mapped_buffer->pgsz_idx = binfo.pgsz_idx; |
916 | mapped_buffer->ctag_offset = ctag_offset; | ||
917 | mapped_buffer->ctag_lines = binfo.ctag_lines; | ||
918 | mapped_buffer->ctag_allocated_lines = comptags.allocated_lines; | ||
919 | mapped_buffer->vm = vm; | 967 | mapped_buffer->vm = vm; |
920 | mapped_buffer->flags = flags; | 968 | mapped_buffer->flags = flags; |
921 | mapped_buffer->kind = map_key_kind; | 969 | mapped_buffer->kind = map_key_kind; |
@@ -1074,8 +1122,8 @@ done: | |||
1074 | return; | 1122 | return; |
1075 | } | 1123 | } |
1076 | 1124 | ||
1077 | int nvgpu_vm_compute_compression(struct vm_gk20a *vm, | 1125 | static int nvgpu_vm_compute_compression(struct vm_gk20a *vm, |
1078 | struct nvgpu_ctag_buffer_info *binfo) | 1126 | struct nvgpu_ctag_buffer_info *binfo) |
1079 | { | 1127 | { |
1080 | bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID); | 1128 | bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID); |
1081 | struct gk20a *g = gk20a_from_vm(vm); | 1129 | struct gk20a *g = gk20a_from_vm(vm); |