diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-05-11 13:25:47 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-06 20:09:22 -0400 |
commit | 048c6b062ae381a329dccbc7ca0599113dbd7417 (patch) | |
tree | 24712fcaf967e22bd91bcb6a81195cf79ac08cc1 /drivers/gpu/nvgpu/gk20a | |
parent | c21f5bca9ae81804130e30ea3e6f7a18d51203dc (diff) |
gpu: nvgpu: Separate GMMU mapping impl from mm_gk20a.c
Separate the non-chip specific GMMU mapping implementation code
out of mm_gk20a.c. This puts all of the chip-agnostic code into
common/mm/gmmu.c in preparation for rewriting it.
JIRA NVGPU-12
JIRA NVGPU-30
Change-Id: I6f7fdac3422703f5e80bb22ad304dc27bba4814d
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1480228
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 521 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 7 |
2 files changed, 0 insertions, 528 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index a1873a30..e7bcf6f0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -124,15 +124,6 @@ struct nvgpu_page_alloc *get_vidmem_page_alloc(struct scatterlist *sgl) | |||
124 | * | 124 | * |
125 | */ | 125 | */ |
126 | 126 | ||
127 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
128 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
129 | struct sg_table *sgt, u64 buffer_offset, | ||
130 | u64 first_vaddr, u64 last_vaddr, | ||
131 | u8 kind_v, u32 ctag_offset, bool cacheable, | ||
132 | bool umapped_pte, int rw_flag, | ||
133 | bool sparse, | ||
134 | bool priv, | ||
135 | enum nvgpu_aperture aperture); | ||
136 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | 127 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
137 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 128 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
138 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | 129 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); |
@@ -781,104 +772,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g) | |||
781 | #endif | 772 | #endif |
782 | } | 773 | } |
783 | 774 | ||
784 | static void free_gmmu_phys_pages(struct vm_gk20a *vm, | ||
785 | struct gk20a_mm_entry *entry) | ||
786 | { | ||
787 | gk20a_dbg_fn(""); | ||
788 | |||
789 | /* note: mem_desc slightly abused (wrt. free_gmmu_pages) */ | ||
790 | |||
791 | free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size)); | ||
792 | entry->mem.cpu_va = NULL; | ||
793 | |||
794 | sg_free_table(entry->mem.priv.sgt); | ||
795 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
796 | entry->mem.priv.sgt = NULL; | ||
797 | entry->mem.size = 0; | ||
798 | entry->mem.aperture = APERTURE_INVALID; | ||
799 | } | ||
800 | |||
801 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
802 | { | ||
803 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
804 | sg_phys(entry->mem.priv.sgt->sgl), | ||
805 | entry->mem.priv.sgt->sgl->length); | ||
806 | return 0; | ||
807 | } | ||
808 | |||
809 | static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) | ||
810 | { | ||
811 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
812 | sg_phys(entry->mem.priv.sgt->sgl), | ||
813 | entry->mem.priv.sgt->sgl->length); | ||
814 | } | ||
815 | |||
816 | void free_gmmu_pages(struct vm_gk20a *vm, | ||
817 | struct gk20a_mm_entry *entry) | ||
818 | { | ||
819 | struct gk20a *g = gk20a_from_vm(vm); | ||
820 | |||
821 | gk20a_dbg_fn(""); | ||
822 | |||
823 | if (!entry->mem.size) | ||
824 | return; | ||
825 | |||
826 | if (entry->woffset) /* fake shadow mem */ | ||
827 | return; | ||
828 | |||
829 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
830 | free_gmmu_phys_pages(vm, entry); | ||
831 | return; | ||
832 | } | ||
833 | |||
834 | nvgpu_dma_free(g, &entry->mem); | ||
835 | } | ||
836 | |||
837 | int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
838 | { | ||
839 | gk20a_dbg_fn(""); | ||
840 | |||
841 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
842 | return map_gmmu_phys_pages(entry); | ||
843 | |||
844 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
845 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
846 | return 0; | ||
847 | |||
848 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
849 | sg_phys(entry->mem.priv.sgt->sgl), | ||
850 | entry->mem.size); | ||
851 | } else { | ||
852 | int err = nvgpu_mem_begin(g, &entry->mem); | ||
853 | |||
854 | if (err) | ||
855 | return err; | ||
856 | } | ||
857 | |||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | ||
862 | { | ||
863 | gk20a_dbg_fn(""); | ||
864 | |||
865 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { | ||
866 | unmap_gmmu_phys_pages(entry); | ||
867 | return; | ||
868 | } | ||
869 | |||
870 | if (IS_ENABLED(CONFIG_ARM64)) { | ||
871 | if (entry->mem.aperture == APERTURE_VIDMEM) | ||
872 | return; | ||
873 | |||
874 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
875 | sg_phys(entry->mem.priv.sgt->sgl), | ||
876 | entry->mem.size); | ||
877 | } else { | ||
878 | nvgpu_mem_end(g, &entry->mem); | ||
879 | } | ||
880 | } | ||
881 | |||
882 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) | 775 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) |
883 | { | 776 | { |
884 | return vm->mmu_levels[0].lo_bit[0]; | 777 | return vm->mmu_levels[0].lo_bit[0]; |
@@ -909,21 +802,6 @@ static u32 pte_from_index(u32 i) | |||
909 | return i * gmmu_pte__size_v() / sizeof(u32); | 802 | return i * gmmu_pte__size_v() / sizeof(u32); |
910 | } | 803 | } |
911 | 804 | ||
912 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
913 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) | ||
914 | { | ||
915 | u32 ret; | ||
916 | /* mask off pde part */ | ||
917 | addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL); | ||
918 | |||
919 | /* shift over to get pte index. note assumption that pte index | ||
920 | * doesn't leak over into the high 32b */ | ||
921 | ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx])); | ||
922 | |||
923 | gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); | ||
924 | return ret; | ||
925 | } | ||
926 | |||
927 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | 805 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, |
928 | struct nvgpu_mapped_buf ***mapped_buffers, | 806 | struct nvgpu_mapped_buf ***mapped_buffers, |
929 | int *num_buffers) | 807 | int *num_buffers) |
@@ -1096,141 +974,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
1096 | return 0; | 974 | return 0; |
1097 | } | 975 | } |
1098 | 976 | ||
1099 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | ||
1100 | u64 map_offset, | ||
1101 | struct sg_table *sgt, | ||
1102 | u64 buffer_offset, | ||
1103 | u64 size, | ||
1104 | int pgsz_idx, | ||
1105 | u8 kind_v, | ||
1106 | u32 ctag_offset, | ||
1107 | u32 flags, | ||
1108 | int rw_flag, | ||
1109 | bool clear_ctags, | ||
1110 | bool sparse, | ||
1111 | bool priv, | ||
1112 | struct vm_gk20a_mapping_batch *batch, | ||
1113 | enum nvgpu_aperture aperture) | ||
1114 | { | ||
1115 | int err = 0; | ||
1116 | bool allocated = false; | ||
1117 | struct gk20a *g = gk20a_from_vm(vm); | ||
1118 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1119 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); | ||
1120 | |||
1121 | /* Allocate (or validate when map_offset != 0) the virtual address. */ | ||
1122 | if (!map_offset) { | ||
1123 | map_offset = __nvgpu_vm_alloc_va(vm, size, | ||
1124 | pgsz_idx); | ||
1125 | if (!map_offset) { | ||
1126 | nvgpu_err(g, "failed to allocate va space"); | ||
1127 | err = -ENOMEM; | ||
1128 | goto fail_alloc; | ||
1129 | } | ||
1130 | allocated = true; | ||
1131 | } | ||
1132 | |||
1133 | gk20a_dbg(gpu_dbg_map, | ||
1134 | "gv: 0x%04x_%08x + 0x%-7llx " | ||
1135 | "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] " | ||
1136 | "pgsz=%-3dKb as=%-2d ctags=%d start=%d " | ||
1137 | "kind=0x%x flags=0x%x apt=%s", | ||
1138 | u64_hi32(map_offset), u64_lo32(map_offset), size, | ||
1139 | sgt ? u64_hi32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
1140 | sgt ? u64_lo32((u64)sg_dma_address(sgt->sgl)) : 0, | ||
1141 | sgt ? u64_hi32((u64)sg_phys(sgt->sgl)) : 0, | ||
1142 | sgt ? u64_lo32((u64)sg_phys(sgt->sgl)) : 0, | ||
1143 | vm->gmmu_page_sizes[pgsz_idx] >> 10, vm_aspace_id(vm), | ||
1144 | ctag_lines, ctag_offset, | ||
1145 | kind_v, flags, nvgpu_aperture_str(aperture)); | ||
1146 | |||
1147 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | ||
1148 | sgt, | ||
1149 | buffer_offset, | ||
1150 | map_offset, map_offset + size, | ||
1151 | kind_v, | ||
1152 | ctag_offset, | ||
1153 | flags & | ||
1154 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1155 | flags & | ||
1156 | NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE, | ||
1157 | rw_flag, | ||
1158 | sparse, | ||
1159 | priv, | ||
1160 | aperture); | ||
1161 | if (err) { | ||
1162 | nvgpu_err(g, "failed to update ptes on map"); | ||
1163 | goto fail_validate; | ||
1164 | } | ||
1165 | |||
1166 | if (!batch) | ||
1167 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
1168 | else | ||
1169 | batch->need_tlb_invalidate = true; | ||
1170 | |||
1171 | return map_offset; | ||
1172 | fail_validate: | ||
1173 | if (allocated) | ||
1174 | __nvgpu_vm_free_va(vm, map_offset, pgsz_idx); | ||
1175 | fail_alloc: | ||
1176 | nvgpu_err(g, "%s: failed with err=%d", __func__, err); | ||
1177 | return 0; | ||
1178 | } | ||
1179 | |||
1180 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | ||
1181 | u64 vaddr, | ||
1182 | u64 size, | ||
1183 | int pgsz_idx, | ||
1184 | bool va_allocated, | ||
1185 | int rw_flag, | ||
1186 | bool sparse, | ||
1187 | struct vm_gk20a_mapping_batch *batch) | ||
1188 | { | ||
1189 | int err = 0; | ||
1190 | struct gk20a *g = gk20a_from_vm(vm); | ||
1191 | |||
1192 | if (va_allocated) { | ||
1193 | err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); | ||
1194 | if (err) { | ||
1195 | nvgpu_err(g, "failed to free va"); | ||
1196 | return; | ||
1197 | } | ||
1198 | } | ||
1199 | |||
1200 | /* unmap here needs to know the page size we assigned at mapping */ | ||
1201 | err = update_gmmu_ptes_locked(vm, | ||
1202 | pgsz_idx, | ||
1203 | NULL, /* n/a for unmap */ | ||
1204 | 0, | ||
1205 | vaddr, | ||
1206 | vaddr + size, | ||
1207 | 0, 0, false /* n/a for unmap */, | ||
1208 | false, rw_flag, | ||
1209 | sparse, 0, | ||
1210 | APERTURE_INVALID); /* don't care for unmap */ | ||
1211 | if (err) | ||
1212 | nvgpu_err(g, "failed to update gmmu ptes on unmap"); | ||
1213 | |||
1214 | /* flush l2 so any dirty lines are written out *now*. | ||
1215 | * also as we could potentially be switching this buffer | ||
1216 | * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at | ||
1217 | * some point in the future we need to invalidate l2. e.g. switching | ||
1218 | * from a render buffer unmap (here) to later using the same memory | ||
1219 | * for gmmu ptes. note the positioning of this relative to any smmu | ||
1220 | * unmapping (below). */ | ||
1221 | |||
1222 | if (!batch) { | ||
1223 | gk20a_mm_l2_flush(g, true); | ||
1224 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | ||
1225 | } else { | ||
1226 | if (!batch->gpu_l2_flushed) { | ||
1227 | gk20a_mm_l2_flush(g, true); | ||
1228 | batch->gpu_l2_flushed = true; | ||
1229 | } | ||
1230 | batch->need_tlb_invalidate = true; | ||
1231 | } | ||
1232 | } | ||
1233 | |||
1234 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, | 977 | enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, |
1235 | struct dma_buf *dmabuf) | 978 | struct dma_buf *dmabuf) |
1236 | { | 979 | { |
@@ -2036,254 +1779,6 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2036 | return 0; | 1779 | return 0; |
2037 | } | 1780 | } |
2038 | 1781 | ||
2039 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | ||
2040 | struct gk20a_mm_entry *pte, | ||
2041 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
2042 | struct scatterlist **sgl, | ||
2043 | u64 *offset, | ||
2044 | u64 *iova, | ||
2045 | u64 gpu_va, u64 gpu_end, | ||
2046 | u8 kind_v, u64 *ctag, | ||
2047 | bool cacheable, bool unmapped_pte, | ||
2048 | int rw_flag, | ||
2049 | bool sparse, | ||
2050 | int lvl, | ||
2051 | bool priv, | ||
2052 | enum nvgpu_aperture aperture) | ||
2053 | { | ||
2054 | struct gk20a *g = gk20a_from_vm(vm); | ||
2055 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
2056 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | ||
2057 | int err = 0; | ||
2058 | u32 pde_i; | ||
2059 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | ||
2060 | struct gk20a_mm_entry *next_pte = NULL, *prev_pte = NULL; | ||
2061 | |||
2062 | gk20a_dbg_fn(""); | ||
2063 | |||
2064 | pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) | ||
2065 | >> (u64)l->lo_bit[pgsz_idx]; | ||
2066 | |||
2067 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | ||
2068 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); | ||
2069 | |||
2070 | while (gpu_va < gpu_end) { | ||
2071 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | ||
2072 | |||
2073 | /* Allocate next level */ | ||
2074 | if (next_l->update_entry) { | ||
2075 | if (!pte->entries) { | ||
2076 | int num_entries = | ||
2077 | 1 << | ||
2078 | (l->hi_bit[pgsz_idx] | ||
2079 | - l->lo_bit[pgsz_idx] + 1); | ||
2080 | pte->entries = | ||
2081 | nvgpu_vzalloc(g, | ||
2082 | sizeof(struct gk20a_mm_entry) * | ||
2083 | num_entries); | ||
2084 | if (!pte->entries) | ||
2085 | return -ENOMEM; | ||
2086 | pte->pgsz = pgsz_idx; | ||
2087 | pte->num_entries = num_entries; | ||
2088 | } | ||
2089 | prev_pte = next_pte; | ||
2090 | next_pte = pte->entries + pde_i; | ||
2091 | |||
2092 | if (!next_pte->mem.size) { | ||
2093 | err = nvgpu_zalloc_gmmu_page_table(vm, | ||
2094 | pgsz_idx, next_l, next_pte, prev_pte); | ||
2095 | if (err) | ||
2096 | return err; | ||
2097 | } | ||
2098 | } | ||
2099 | |||
2100 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
2101 | sgl, offset, iova, | ||
2102 | kind_v, ctag, cacheable, unmapped_pte, | ||
2103 | rw_flag, sparse, priv, aperture); | ||
2104 | if (err) | ||
2105 | return err; | ||
2106 | |||
2107 | if (next_l->update_entry) { | ||
2108 | /* get cpu access to the ptes */ | ||
2109 | err = map_gmmu_pages(g, next_pte); | ||
2110 | if (err) { | ||
2111 | nvgpu_err(g, | ||
2112 | "couldn't map ptes for update as=%d", | ||
2113 | vm_aspace_id(vm)); | ||
2114 | return err; | ||
2115 | } | ||
2116 | err = update_gmmu_level_locked(vm, next_pte, | ||
2117 | pgsz_idx, | ||
2118 | sgl, | ||
2119 | offset, | ||
2120 | iova, | ||
2121 | gpu_va, | ||
2122 | next, | ||
2123 | kind_v, ctag, cacheable, unmapped_pte, | ||
2124 | rw_flag, sparse, lvl+1, priv, aperture); | ||
2125 | unmap_gmmu_pages(g, next_pte); | ||
2126 | |||
2127 | if (err) | ||
2128 | return err; | ||
2129 | } | ||
2130 | |||
2131 | pde_i++; | ||
2132 | gpu_va = next; | ||
2133 | } | ||
2134 | |||
2135 | gk20a_dbg_fn("done"); | ||
2136 | |||
2137 | return 0; | ||
2138 | } | ||
2139 | |||
2140 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
2141 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
2142 | struct sg_table *sgt, | ||
2143 | u64 buffer_offset, | ||
2144 | u64 gpu_va, u64 gpu_end, | ||
2145 | u8 kind_v, u32 ctag_offset, | ||
2146 | bool cacheable, bool unmapped_pte, | ||
2147 | int rw_flag, | ||
2148 | bool sparse, | ||
2149 | bool priv, | ||
2150 | enum nvgpu_aperture aperture) | ||
2151 | { | ||
2152 | struct gk20a *g = gk20a_from_vm(vm); | ||
2153 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
2154 | u64 ctag = (u64)ctag_offset * (u64)ctag_granularity; | ||
2155 | u64 iova = 0; | ||
2156 | u64 space_to_skip = buffer_offset; | ||
2157 | u64 map_size = gpu_end - gpu_va; | ||
2158 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
2159 | int err; | ||
2160 | struct scatterlist *sgl = NULL; | ||
2161 | struct nvgpu_page_alloc *alloc = NULL; | ||
2162 | struct page_alloc_chunk *chunk = NULL; | ||
2163 | u64 length; | ||
2164 | |||
2165 | /* note: here we need to map kernel to small, since the | ||
2166 | * low-level mmu code assumes 0 is small and 1 is big pages */ | ||
2167 | if (pgsz_idx == gmmu_page_size_kernel) | ||
2168 | pgsz_idx = gmmu_page_size_small; | ||
2169 | |||
2170 | if (space_to_skip & (page_size - 1)) | ||
2171 | return -EINVAL; | ||
2172 | |||
2173 | err = map_gmmu_pages(g, &vm->pdb); | ||
2174 | if (err) { | ||
2175 | nvgpu_err(g, | ||
2176 | "couldn't map ptes for update as=%d", | ||
2177 | vm_aspace_id(vm)); | ||
2178 | return err; | ||
2179 | } | ||
2180 | |||
2181 | if (aperture == APERTURE_VIDMEM) { | ||
2182 | gk20a_dbg(gpu_dbg_map_v, "vidmem map size_idx=%d, gpu_va=[%llx,%llx], alloc=%llx", | ||
2183 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
2184 | |||
2185 | if (sgt) { | ||
2186 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
2187 | |||
2188 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | ||
2189 | page_alloc_chunk, list_entry) { | ||
2190 | if (space_to_skip && | ||
2191 | space_to_skip > chunk->length) { | ||
2192 | space_to_skip -= chunk->length; | ||
2193 | } else { | ||
2194 | iova = chunk->base + space_to_skip; | ||
2195 | length = chunk->length - space_to_skip; | ||
2196 | length = min(length, map_size); | ||
2197 | space_to_skip = 0; | ||
2198 | |||
2199 | err = update_gmmu_level_locked(vm, | ||
2200 | &vm->pdb, pgsz_idx, | ||
2201 | &sgl, | ||
2202 | &space_to_skip, | ||
2203 | &iova, | ||
2204 | gpu_va, gpu_va + length, | ||
2205 | kind_v, &ctag, | ||
2206 | cacheable, unmapped_pte, | ||
2207 | rw_flag, sparse, 0, priv, | ||
2208 | aperture); | ||
2209 | if (err) | ||
2210 | break; | ||
2211 | |||
2212 | /* need to set explicit zero here */ | ||
2213 | space_to_skip = 0; | ||
2214 | gpu_va += length; | ||
2215 | map_size -= length; | ||
2216 | |||
2217 | if (!map_size) | ||
2218 | break; | ||
2219 | } | ||
2220 | } | ||
2221 | } else { | ||
2222 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
2223 | &sgl, | ||
2224 | &space_to_skip, | ||
2225 | &iova, | ||
2226 | gpu_va, gpu_end, | ||
2227 | kind_v, &ctag, | ||
2228 | cacheable, unmapped_pte, rw_flag, | ||
2229 | sparse, 0, priv, | ||
2230 | aperture); | ||
2231 | } | ||
2232 | } else { | ||
2233 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d", | ||
2234 | pgsz_idx, | ||
2235 | sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0) | ||
2236 | : 0ULL, | ||
2237 | buffer_offset, | ||
2238 | sgt ? sgt->nents : 0); | ||
2239 | |||
2240 | gk20a_dbg(gpu_dbg_map_v, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", | ||
2241 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
2242 | |||
2243 | if (sgt) { | ||
2244 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, 0); | ||
2245 | if (!vm->mm->bypass_smmu && iova) { | ||
2246 | iova += space_to_skip; | ||
2247 | } else { | ||
2248 | sgl = sgt->sgl; | ||
2249 | |||
2250 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
2251 | (u64)sg_phys(sgl), | ||
2252 | sgl->length); | ||
2253 | |||
2254 | while (space_to_skip && sgl && | ||
2255 | space_to_skip + page_size > sgl->length) { | ||
2256 | space_to_skip -= sgl->length; | ||
2257 | sgl = sg_next(sgl); | ||
2258 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
2259 | (u64)sg_phys(sgl), | ||
2260 | sgl->length); | ||
2261 | } | ||
2262 | |||
2263 | iova = sg_phys(sgl) + space_to_skip; | ||
2264 | } | ||
2265 | } | ||
2266 | |||
2267 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
2268 | &sgl, | ||
2269 | &space_to_skip, | ||
2270 | &iova, | ||
2271 | gpu_va, gpu_end, | ||
2272 | kind_v, &ctag, | ||
2273 | cacheable, unmapped_pte, rw_flag, | ||
2274 | sparse, 0, priv, | ||
2275 | aperture); | ||
2276 | } | ||
2277 | |||
2278 | unmap_gmmu_pages(g, &vm->pdb); | ||
2279 | |||
2280 | smp_mb(); | ||
2281 | |||
2282 | gk20a_dbg_fn("done"); | ||
2283 | |||
2284 | return err; | ||
2285 | } | ||
2286 | |||
2287 | /* NOTE! mapped_buffers lock must be held */ | 1782 | /* NOTE! mapped_buffers lock must be held */ |
2288 | void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, | 1783 | void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, |
2289 | struct vm_gk20a_mapping_batch *batch) | 1784 | struct vm_gk20a_mapping_batch *batch) |
@@ -2341,22 +1836,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, | |||
2341 | return; | 1836 | return; |
2342 | } | 1837 | } |
2343 | 1838 | ||
2344 | void gk20a_vm_free_entries(struct vm_gk20a *vm, | ||
2345 | struct gk20a_mm_entry *parent, | ||
2346 | int level) | ||
2347 | { | ||
2348 | int i; | ||
2349 | |||
2350 | if (parent->entries) | ||
2351 | for (i = 0; i < parent->num_entries; i++) | ||
2352 | gk20a_vm_free_entries(vm, &parent->entries[i], level+1); | ||
2353 | |||
2354 | if (parent->mem.size) | ||
2355 | free_gmmu_pages(vm, parent); | ||
2356 | nvgpu_vfree(vm->mm->g, parent->entries); | ||
2357 | parent->entries = NULL; | ||
2358 | } | ||
2359 | |||
2360 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { | 1839 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { |
2361 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | 1840 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, |
2362 | .lo_bit = {26, 26}, | 1841 | .lo_bit = {26, 26}, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7e2ba051..2581bc0d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -433,17 +433,10 @@ int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | |||
433 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, | 433 | int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g, |
434 | u64 offset, struct gk20a_buffer_state **state); | 434 | u64 offset, struct gk20a_buffer_state **state); |
435 | 435 | ||
436 | int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry); | ||
437 | void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry); | ||
438 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, | 436 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, |
439 | u64 addr_lo, u64 addr_hi, | 437 | u64 addr_lo, u64 addr_hi, |
440 | u32 *pde_lo, u32 *pde_hi); | 438 | u32 *pde_lo, u32 *pde_hi); |
441 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); | 439 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); |
442 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
443 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); | ||
444 | void free_gmmu_pages(struct vm_gk20a *vm, | ||
445 | struct gk20a_mm_entry *entry); | ||
446 | |||
447 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); | 440 | u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); |
448 | 441 | ||
449 | struct gpu_ops; | 442 | struct gpu_ops; |