diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 81 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/pd_cache.c | 426 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 50 |
3 files changed, 499 insertions, 58 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index ec1bc095..602dfb3b 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -45,7 +45,8 @@ static int pd_allocate(struct vm_gk20a *vm, | |||
45 | struct nvgpu_gmmu_pd *pd, | 45 | struct nvgpu_gmmu_pd *pd, |
46 | const struct gk20a_mmu_level *l, | 46 | const struct gk20a_mmu_level *l, |
47 | struct nvgpu_gmmu_attrs *attrs); | 47 | struct nvgpu_gmmu_attrs *attrs); |
48 | 48 | static u32 pd_size(const struct gk20a_mmu_level *l, | |
49 | struct nvgpu_gmmu_attrs *attrs); | ||
49 | /* | 50 | /* |
50 | * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU | 51 | * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU |
51 | * VA will be allocated for you. If addr is non-zero then the buffer will be | 52 | * VA will be allocated for you. If addr is non-zero then the buffer will be |
@@ -138,6 +139,9 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va) | |||
138 | 139 | ||
139 | int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) | 140 | int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) |
140 | { | 141 | { |
142 | u32 pdb_size; | ||
143 | int err; | ||
144 | |||
141 | /* | 145 | /* |
142 | * Need this just for page size. Everything else can be ignored. Also | 146 | * Need this just for page size. Everything else can be ignored. Also |
143 | * note that we can just use pgsz 0 (i.e small pages) since the number | 147 | * note that we can just use pgsz 0 (i.e small pages) since the number |
@@ -148,56 +152,43 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) | |||
148 | .pgsz = 0, | 152 | .pgsz = 0, |
149 | }; | 153 | }; |
150 | 154 | ||
151 | return pd_allocate(vm, &vm->pdb, &vm->mmu_levels[0], &attrs); | 155 | /* |
152 | } | 156 | * PDB size here must be one page so that its address is page size |
157 | * aligned. Although lower PDE tables can be aligned at 256B boundaries | ||
158 | * the main PDB must be page aligned. | ||
159 | */ | ||
160 | pdb_size = ALIGN(pd_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE); | ||
161 | |||
162 | err = __nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size); | ||
163 | if (WARN_ON(err)) | ||
164 | return err; | ||
153 | 165 | ||
166 | /* | ||
167 | * One mb() is done after all mapping operations. Don't need individual | ||
168 | * barriers for each PD write. | ||
169 | */ | ||
170 | vm->pdb.mem->skip_wmb = true; | ||
171 | |||
172 | return 0; | ||
173 | } | ||
154 | 174 | ||
155 | /* | 175 | /* |
156 | * Ensure that there's a CPU mapping for the page directory memory. This won't | 176 | * Ensure that there's a CPU mapping for the page directory memory. This won't |
157 | * always be the case for 32 bit systems since we may need to save kernel | 177 | * always be the case for 32 bit systems since we may need to save kernel |
158 | * virtual memory. | 178 | * virtual memory. |
159 | */ | 179 | */ |
160 | static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *entry) | 180 | static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd) |
161 | { | 181 | { |
162 | return nvgpu_mem_begin(g, &entry->mem); | 182 | return nvgpu_mem_begin(g, pd->mem); |
163 | } | 183 | } |
164 | 184 | ||
165 | /* | 185 | /* |
166 | * Handle any necessary CPU unmap semantics for a page directories DMA memory. | 186 | * Handle any necessary CPU unmap semantics for a page directories DMA memory. |
167 | * For 64 bit platforms this is a noop. | 187 | * For 64 bit platforms this is a noop. |
168 | */ | 188 | */ |
169 | static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *entry) | 189 | static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd) |
170 | { | ||
171 | nvgpu_mem_end(g, &entry->mem); | ||
172 | } | ||
173 | |||
174 | static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 bytes, | ||
175 | struct nvgpu_gmmu_pd *pd) | ||
176 | { | ||
177 | struct gk20a *g = gk20a_from_vm(vm); | ||
178 | unsigned long flags = NVGPU_DMA_FORCE_CONTIGUOUS; | ||
179 | int err; | ||
180 | |||
181 | /* | ||
182 | * On arm32 vmalloc space is a precious commodity so we do not map pages | ||
183 | * by default. | ||
184 | */ | ||
185 | if (!IS_ENABLED(CONFIG_ARM64)) | ||
186 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
187 | |||
188 | err = nvgpu_dma_alloc_flags(g, flags, bytes, &pd->mem); | ||
189 | if (err) | ||
190 | return -ENOMEM; | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, | ||
196 | struct nvgpu_gmmu_pd *pd) | ||
197 | { | 190 | { |
198 | struct gk20a *g = gk20a_from_vm(vm); | 191 | nvgpu_mem_end(g, pd->mem); |
199 | |||
200 | nvgpu_dma_free(g, &pd->mem); | ||
201 | } | 192 | } |
202 | 193 | ||
203 | /* | 194 | /* |
@@ -205,10 +196,14 @@ void nvgpu_free_gmmu_pages(struct vm_gk20a *vm, | |||
205 | */ | 196 | */ |
206 | u64 nvgpu_pde_phys_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd) | 197 | u64 nvgpu_pde_phys_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd) |
207 | { | 198 | { |
199 | u64 page_addr; | ||
200 | |||
208 | if (g->mm.has_physical_mode) | 201 | if (g->mm.has_physical_mode) |
209 | return sg_phys(pd->mem.priv.sgt->sgl); | 202 | page_addr = sg_phys(pd->mem->priv.sgt->sgl); |
210 | else | 203 | else |
211 | return nvgpu_mem_get_base_addr(g, &pd->mem, 0); | 204 | page_addr = nvgpu_mem_get_base_addr(g, pd->mem, 0); |
205 | |||
206 | return page_addr + pd->mem_offs; | ||
212 | } | 207 | } |
213 | 208 | ||
214 | /* | 209 | /* |
@@ -254,10 +249,10 @@ static int pd_allocate(struct vm_gk20a *vm, | |||
254 | { | 249 | { |
255 | int err; | 250 | int err; |
256 | 251 | ||
257 | if (pd->mem.size) | 252 | if (pd->mem) |
258 | return 0; | 253 | return 0; |
259 | 254 | ||
260 | err = nvgpu_alloc_gmmu_pages(vm, pd_size(l, attrs), pd); | 255 | err = __nvgpu_pd_alloc(vm, pd, pd_size(l, attrs)); |
261 | if (err) { | 256 | if (err) { |
262 | nvgpu_info(vm->mm->g, "error allocating page directory!"); | 257 | nvgpu_info(vm->mm->g, "error allocating page directory!"); |
263 | return err; | 258 | return err; |
@@ -267,7 +262,7 @@ static int pd_allocate(struct vm_gk20a *vm, | |||
267 | * One mb() is done after all mapping operations. Don't need individual | 262 | * One mb() is done after all mapping operations. Don't need individual |
268 | * barriers for each PD write. | 263 | * barriers for each PD write. |
269 | */ | 264 | */ |
270 | pd->mem.skip_wmb = true; | 265 | pd->mem->skip_wmb = true; |
271 | 266 | ||
272 | return 0; | 267 | return 0; |
273 | } | 268 | } |
@@ -778,7 +773,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
778 | } | 773 | } |
779 | 774 | ||
780 | if (!batch) | 775 | if (!batch) |
781 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | 776 | g->ops.fb.tlb_invalidate(g, vm->pdb.mem); |
782 | else | 777 | else |
783 | batch->need_tlb_invalidate = true; | 778 | batch->need_tlb_invalidate = true; |
784 | 779 | ||
@@ -830,7 +825,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
830 | 825 | ||
831 | if (!batch) { | 826 | if (!batch) { |
832 | gk20a_mm_l2_flush(g, true); | 827 | gk20a_mm_l2_flush(g, true); |
833 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | 828 | g->ops.fb.tlb_invalidate(g, vm->pdb.mem); |
834 | } else { | 829 | } else { |
835 | if (!batch->gpu_l2_flushed) { | 830 | if (!batch->gpu_l2_flushed) { |
836 | gk20a_mm_l2_flush(g, true); | 831 | gk20a_mm_l2_flush(g, true); |
diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c new file mode 100644 index 00000000..4f312eff --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c | |||
@@ -0,0 +1,426 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/log.h> | ||
18 | #include <nvgpu/dma.h> | ||
19 | #include <nvgpu/gmmu.h> | ||
20 | #include <nvgpu/nvgpu_mem.h> | ||
21 | #include <nvgpu/list.h> | ||
22 | #include <nvgpu/log2.h> | ||
23 | |||
24 | #include "gk20a/gk20a.h" | ||
25 | #include "gk20a/mm_gk20a.h" | ||
26 | |||
27 | #define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args) | ||
28 | |||
29 | /** | ||
30 | * DOC: PD cache | ||
31 | * | ||
32 | * In the name of saving memory with the many sub-page sized PD levels in Pascal | ||
33 | * and beyond a way of packing PD tables together is necessary. This code here | ||
34 | * does just that. If a PD table only requires 1024 bytes, then it is possible | ||
35 | * to have 4 of these PDs in one page. This is even more pronounced for 256 byte | ||
36 | * PD tables. | ||
37 | * | ||
38 | * The pd cache is basially just a slab allocator. Each instance of the nvgpu | ||
39 | * driver makes one of these structs: | ||
40 | * | ||
41 | * struct nvgpu_pd_cache { | ||
42 | * struct nvgpu_list_node full[NVGPU_PD_CACHE_COUNT]; | ||
43 | * struct nvgpu_list_node partial[NVGPU_PD_CACHE_COUNT]; | ||
44 | * | ||
45 | * struct nvgpu_rbtree_node *mem_tree; | ||
46 | * }; | ||
47 | * | ||
48 | * There are two sets of lists, the full and the partial. The full lists contain | ||
49 | * pages of memory for which all the memory in that page is in use. The partial | ||
50 | * lists contain partially full pages of memory which can be used for more PD | ||
51 | * allocations. There a couple of assumptions here: | ||
52 | * | ||
53 | * 1. PDs greater than or equal to the page size bypass the pd cache. | ||
54 | * 2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes. | ||
55 | * | ||
56 | * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial | ||
57 | * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for | ||
58 | * 256, 512, 1024, and 2048 byte PDs. | ||
59 | * | ||
60 | * __nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD | ||
61 | * size is page size or larger and choose the correct allocation scheme - either | ||
62 | * from the PD cache or directly. Similarly __nvgpu_pd_free() will free a PD | ||
63 | * allocated by __nvgpu_pd_alloc(). | ||
64 | * | ||
65 | * Since the top level PD (the PDB) is a page aligned pointer but less than a | ||
66 | * page size the direct functions must be used for allocating PDBs. Otherwise | ||
67 | * there would be alignment issues for the PDBs when they get packed. | ||
68 | */ | ||
69 | |||
70 | static u32 nvgpu_pd_cache_nr(u32 bytes) | ||
71 | { | ||
72 | return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1)); | ||
73 | } | ||
74 | |||
75 | static u32 nvgpu_pd_cache_get_mask(struct nvgpu_pd_mem_entry *pentry) | ||
76 | { | ||
77 | u32 mask_offset = 1 << (PAGE_SIZE / pentry->pd_size); | ||
78 | |||
79 | return mask_offset - 1; | ||
80 | } | ||
81 | |||
82 | int nvgpu_pd_cache_init(struct gk20a *g) | ||
83 | { | ||
84 | struct nvgpu_pd_cache *cache; | ||
85 | int i; | ||
86 | |||
87 | /* | ||
88 | * This gets called from finalize_poweron() so we need to make sure we | ||
89 | * don't reinit the pd_cache over and over. | ||
90 | */ | ||
91 | if (g->mm.pd_cache) | ||
92 | return 0; | ||
93 | |||
94 | cache = nvgpu_kzalloc(g, sizeof(*cache)); | ||
95 | if (!cache) { | ||
96 | nvgpu_err(g, "Failed to alloc pd_cache!"); | ||
97 | return -ENOMEM; | ||
98 | } | ||
99 | |||
100 | for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) { | ||
101 | nvgpu_init_list_node(&cache->full[i]); | ||
102 | nvgpu_init_list_node(&cache->partial[i]); | ||
103 | } | ||
104 | |||
105 | cache->mem_tree = NULL; | ||
106 | g->mm.pd_cache = cache; | ||
107 | nvgpu_mutex_init(&cache->lock); | ||
108 | |||
109 | pd_dbg(g, "PD cache initialized!"); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | void nvgpu_pd_cache_fini(struct gk20a *g) | ||
115 | { | ||
116 | int i; | ||
117 | struct nvgpu_pd_cache *cache = g->mm.pd_cache; | ||
118 | |||
119 | if (!cache) | ||
120 | return; | ||
121 | |||
122 | for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) { | ||
123 | WARN_ON(!nvgpu_list_empty(&cache->full[i])); | ||
124 | WARN_ON(!nvgpu_list_empty(&cache->partial[i])); | ||
125 | } | ||
126 | |||
127 | nvgpu_kfree(g, g->mm.pd_cache); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * This is the simple pass-through for greater than page or page sized PDs. | ||
132 | * | ||
133 | * Note: this does not need the cache lock since it does not modify any of the | ||
134 | * PD cache data structures. | ||
135 | */ | ||
136 | int __nvgpu_pd_cache_alloc_direct(struct gk20a *g, | ||
137 | struct nvgpu_gmmu_pd *pd, u32 bytes) | ||
138 | { | ||
139 | int err; | ||
140 | |||
141 | pd_dbg(g, "PD-Alloc [D] %u bytes", bytes); | ||
142 | |||
143 | pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem)); | ||
144 | if (!pd->mem) { | ||
145 | pd_dbg(g, "OOM allocating nvgpu_mem struct!"); | ||
146 | return -ENOMEM; | ||
147 | } | ||
148 | |||
149 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_FORCE_CONTIGUOUS, | ||
150 | bytes, pd->mem); | ||
151 | if (err) { | ||
152 | pd_dbg(g, "OOM allocating page directory!"); | ||
153 | nvgpu_kfree(g, pd->mem); | ||
154 | return -ENOMEM; | ||
155 | } | ||
156 | |||
157 | pd->cached = false; | ||
158 | pd->mem_offs = 0; | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed | ||
165 | * pd to reflect this allocation. | ||
166 | */ | ||
167 | static int nvgpu_pd_cache_alloc_new(struct gk20a *g, | ||
168 | struct nvgpu_pd_cache *cache, | ||
169 | struct nvgpu_gmmu_pd *pd, | ||
170 | u32 bytes) | ||
171 | { | ||
172 | struct nvgpu_pd_mem_entry *pentry; | ||
173 | |||
174 | pd_dbg(g, "PD-Alloc [C] New: offs=0"); | ||
175 | |||
176 | pentry = nvgpu_kzalloc(g, sizeof(*pentry)); | ||
177 | if (!pentry) { | ||
178 | pd_dbg(g, "OOM allocating pentry!"); | ||
179 | return -ENOMEM; | ||
180 | } | ||
181 | |||
182 | if (nvgpu_dma_alloc_flags(g, NVGPU_DMA_FORCE_CONTIGUOUS, | ||
183 | PAGE_SIZE, &pentry->mem)) { | ||
184 | nvgpu_kfree(g, pentry); | ||
185 | pd_dbg(g, "Unable to DMA alloc!"); | ||
186 | return -ENOMEM; | ||
187 | } | ||
188 | |||
189 | pentry->pd_size = bytes; | ||
190 | nvgpu_list_add(&pentry->list_entry, | ||
191 | &cache->partial[nvgpu_pd_cache_nr(bytes)]); | ||
192 | |||
193 | /* | ||
194 | * This allocates the very first PD table in the set of tables in this | ||
195 | * nvgpu_pd_mem_entry. | ||
196 | */ | ||
197 | pentry->alloc_map = 1; | ||
198 | |||
199 | /* | ||
200 | * Now update the nvgpu_gmmu_pd to reflect this allocation. | ||
201 | */ | ||
202 | pd->mem = &pentry->mem; | ||
203 | pd->mem_offs = 0; | ||
204 | pd->cached = true; | ||
205 | |||
206 | pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem; | ||
207 | nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree); | ||
208 | |||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g, | ||
213 | struct nvgpu_pd_cache *cache, | ||
214 | struct nvgpu_pd_mem_entry *pentry, | ||
215 | struct nvgpu_gmmu_pd *pd) | ||
216 | { | ||
217 | unsigned long bit_offs; | ||
218 | u32 mem_offs; | ||
219 | u32 pentry_mask = nvgpu_pd_cache_get_mask(pentry); | ||
220 | |||
221 | /* | ||
222 | * Find and allocate an open PD. | ||
223 | */ | ||
224 | bit_offs = ffz(pentry->alloc_map); | ||
225 | mem_offs = bit_offs * pentry->pd_size; | ||
226 | |||
227 | /* Bit map full. Somethings wrong. */ | ||
228 | if (WARN_ON(bit_offs >= ffz(pentry_mask))) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | pentry->alloc_map |= 1 << bit_offs; | ||
232 | |||
233 | pd_dbg(g, "PD-Alloc [C] Partial: offs=%lu", bit_offs); | ||
234 | |||
235 | /* | ||
236 | * First update the pd. | ||
237 | */ | ||
238 | pd->mem = &pentry->mem; | ||
239 | pd->mem_offs = mem_offs; | ||
240 | pd->cached = true; | ||
241 | |||
242 | /* | ||
243 | * Now make sure the pentry is in the correct list (full vs partial). | ||
244 | */ | ||
245 | if ((pentry->alloc_map & pentry_mask) == pentry_mask) { | ||
246 | pd_dbg(g, "Adding pentry to full list!"); | ||
247 | nvgpu_list_del(&pentry->list_entry); | ||
248 | nvgpu_list_add(&pentry->list_entry, | ||
249 | &cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]); | ||
250 | } | ||
251 | |||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial | ||
257 | * nvgpu_pd_mem_entry's. | ||
258 | */ | ||
259 | static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial( | ||
260 | struct nvgpu_pd_cache *cache, u32 bytes) | ||
261 | { | ||
262 | struct nvgpu_list_node *list = | ||
263 | &cache->partial[nvgpu_pd_cache_nr(bytes)]; | ||
264 | |||
265 | if (nvgpu_list_empty(list)) | ||
266 | return NULL; | ||
267 | |||
268 | return nvgpu_list_first_entry(list, | ||
269 | nvgpu_pd_mem_entry, | ||
270 | list_entry); | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * Allocate memory from an nvgpu_mem for the page directory. | ||
275 | */ | ||
276 | static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, | ||
277 | struct nvgpu_gmmu_pd *pd, u32 bytes) | ||
278 | { | ||
279 | struct nvgpu_pd_mem_entry *pentry; | ||
280 | int err; | ||
281 | |||
282 | pd_dbg(g, "PD-Alloc [C] %u bytes", bytes); | ||
283 | |||
284 | if (bytes & (bytes - 1) || | ||
285 | (bytes >= PAGE_SIZE || | ||
286 | bytes < NVGPU_PD_CACHE_MIN)) { | ||
287 | pd_dbg(g, "PD-Alloc [C] Invalid (bytes=%u)!", bytes); | ||
288 | return -EINVAL; | ||
289 | } | ||
290 | |||
291 | pentry = nvgpu_pd_cache_get_partial(cache, bytes); | ||
292 | if (!pentry) | ||
293 | err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes); | ||
294 | else | ||
295 | err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd); | ||
296 | |||
297 | if (err) | ||
298 | pd_dbg(g, "PD-Alloc [C] Failed!"); | ||
299 | |||
300 | return err; | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * Allocate the DMA memory for a page directory. This handles the necessary PD | ||
305 | * cache logistics. Since on Parker and later GPUs some of the page directories | ||
306 | * are smaller than a page packing these PDs together saves a lot of memory. | ||
307 | */ | ||
308 | int __nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes) | ||
309 | { | ||
310 | struct gk20a *g = gk20a_from_vm(vm); | ||
311 | int err; | ||
312 | |||
313 | /* | ||
314 | * Simple case: PD is bigger than a page so just do a regular DMA | ||
315 | * alloc. | ||
316 | */ | ||
317 | if (bytes >= PAGE_SIZE) { | ||
318 | err = __nvgpu_pd_cache_alloc_direct(g, pd, bytes); | ||
319 | if (err) | ||
320 | return err; | ||
321 | |||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | if (WARN_ON(!g->mm.pd_cache)) | ||
326 | return -ENOMEM; | ||
327 | |||
328 | nvgpu_mutex_acquire(&g->mm.pd_cache->lock); | ||
329 | err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes); | ||
330 | nvgpu_mutex_release(&g->mm.pd_cache->lock); | ||
331 | |||
332 | return err; | ||
333 | } | ||
334 | |||
335 | void __nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd) | ||
336 | { | ||
337 | pd_dbg(g, "PD-Free [D] 0x%p", pd->mem); | ||
338 | |||
339 | if (!pd->mem) | ||
340 | return; | ||
341 | |||
342 | nvgpu_dma_free(g, pd->mem); | ||
343 | nvgpu_kfree(g, pd->mem); | ||
344 | pd->mem = NULL; | ||
345 | } | ||
346 | |||
347 | static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g, | ||
348 | struct nvgpu_pd_cache *cache, | ||
349 | struct nvgpu_pd_mem_entry *pentry) | ||
350 | { | ||
351 | nvgpu_dma_free(g, &pentry->mem); | ||
352 | nvgpu_list_del(&pentry->list_entry); | ||
353 | nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree); | ||
354 | nvgpu_kfree(g, pentry); | ||
355 | } | ||
356 | |||
357 | static void nvgpu_pd_cache_do_free(struct gk20a *g, | ||
358 | struct nvgpu_pd_cache *cache, | ||
359 | struct nvgpu_pd_mem_entry *pentry, | ||
360 | struct nvgpu_gmmu_pd *pd) | ||
361 | { | ||
362 | u32 index = pd->mem_offs / pentry->pd_size; | ||
363 | u32 bit = 1 << index; | ||
364 | |||
365 | /* Mark entry as free. */ | ||
366 | pentry->alloc_map &= ~bit; | ||
367 | |||
368 | if (pentry->alloc_map & nvgpu_pd_cache_get_mask(pentry)) { | ||
369 | /* | ||
370 | * Partially full still. If it was already on the partial list | ||
371 | * this just re-adds it. | ||
372 | */ | ||
373 | nvgpu_list_del(&pentry->list_entry); | ||
374 | nvgpu_list_add(&pentry->list_entry, | ||
375 | &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); | ||
376 | } else { | ||
377 | /* Empty now so free it. */ | ||
378 | nvgpu_pd_cache_free_mem_entry(g, cache, pentry); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up( | ||
383 | struct gk20a *g, | ||
384 | struct nvgpu_pd_cache *cache, | ||
385 | struct nvgpu_gmmu_pd *pd) | ||
386 | { | ||
387 | struct nvgpu_rbtree_node *node; | ||
388 | |||
389 | nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node, | ||
390 | cache->mem_tree); | ||
391 | if (!node) | ||
392 | return NULL; | ||
393 | |||
394 | return nvgpu_pd_mem_entry_from_tree_entry(node); | ||
395 | } | ||
396 | |||
397 | static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache, | ||
398 | struct nvgpu_gmmu_pd *pd) | ||
399 | { | ||
400 | struct nvgpu_pd_mem_entry *pentry; | ||
401 | |||
402 | pd_dbg(g, "PD-Free [C] 0x%p", pd->mem); | ||
403 | |||
404 | pentry = nvgpu_pd_cache_look_up(g, cache, pd); | ||
405 | if (!pentry) { | ||
406 | WARN(1, "Attempting to free non-existent pd"); | ||
407 | return; | ||
408 | } | ||
409 | |||
410 | nvgpu_pd_cache_do_free(g, cache, pentry, pd); | ||
411 | } | ||
412 | |||
413 | void __nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd) | ||
414 | { | ||
415 | struct gk20a *g = gk20a_from_vm(vm); | ||
416 | |||
417 | /* | ||
418 | * Simple case: just DMA free. | ||
419 | */ | ||
420 | if (!pd->cached) | ||
421 | return __nvgpu_pd_cache_free_direct(g, pd); | ||
422 | |||
423 | nvgpu_mutex_acquire(&g->mm.pd_cache->lock); | ||
424 | nvgpu_pd_cache_free(g, g->mm.pd_cache, pd); | ||
425 | nvgpu_mutex_release(&g->mm.pd_cache->lock); | ||
426 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3aeba500..3ed3c7fe 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -35,21 +35,42 @@ int vm_aspace_id(struct vm_gk20a *vm) | |||
35 | return vm->as_share ? vm->as_share->id : -1; | 35 | return vm->as_share ? vm->as_share->id : -1; |
36 | } | 36 | } |
37 | 37 | ||
38 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | 38 | static void __nvgpu_vm_free_entries(struct vm_gk20a *vm, |
39 | struct nvgpu_gmmu_pd *parent, | 39 | struct nvgpu_gmmu_pd *pd, |
40 | int level) | 40 | int level) |
41 | { | 41 | { |
42 | int i; | 42 | int i; |
43 | 43 | ||
44 | if (parent->entries) | 44 | if (pd->mem) { |
45 | for (i = 0; i < parent->num_entries; i++) | 45 | __nvgpu_pd_free(vm, pd); |
46 | nvgpu_vm_free_entries(vm, &parent->entries[i], | 46 | pd->mem = NULL; |
47 | } | ||
48 | |||
49 | if (pd->entries) { | ||
50 | for (i = 0; i < pd->num_entries; i++) | ||
51 | __nvgpu_vm_free_entries(vm, &pd->entries[i], | ||
47 | level + 1); | 52 | level + 1); |
53 | nvgpu_vfree(vm->mm->g, pd->entries); | ||
54 | pd->entries = NULL; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static void nvgpu_vm_free_entries(struct vm_gk20a *vm, | ||
59 | struct nvgpu_gmmu_pd *pdb) | ||
60 | { | ||
61 | struct gk20a *g = vm->mm->g; | ||
62 | int i; | ||
63 | |||
64 | __nvgpu_pd_cache_free_direct(g, pdb); | ||
65 | |||
66 | if (!pdb->entries) | ||
67 | return; | ||
68 | |||
69 | for (i = 0; i < pdb->num_entries; i++) | ||
70 | __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1); | ||
48 | 71 | ||
49 | if (parent->mem.size) | 72 | nvgpu_vfree(g, pdb->entries); |
50 | nvgpu_free_gmmu_pages(vm, parent); | 73 | pdb->entries = NULL; |
51 | nvgpu_vfree(vm->mm->g, parent->entries); | ||
52 | parent->entries = NULL; | ||
53 | } | 74 | } |
54 | 75 | ||
55 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, | 76 | u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, |
@@ -110,7 +131,7 @@ void nvgpu_vm_mapping_batch_finish_locked( | |||
110 | 131 | ||
111 | if (mapping_batch->need_tlb_invalidate) { | 132 | if (mapping_batch->need_tlb_invalidate) { |
112 | struct gk20a *g = gk20a_from_vm(vm); | 133 | struct gk20a *g = gk20a_from_vm(vm); |
113 | g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); | 134 | g->ops.fb.tlb_invalidate(g, vm->pdb.mem); |
114 | } | 135 | } |
115 | } | 136 | } |
116 | 137 | ||
@@ -407,9 +428,8 @@ clean_up_allocators: | |||
407 | if (nvgpu_alloc_initialized(&vm->user_lp)) | 428 | if (nvgpu_alloc_initialized(&vm->user_lp)) |
408 | nvgpu_alloc_destroy(&vm->user_lp); | 429 | nvgpu_alloc_destroy(&vm->user_lp); |
409 | clean_up_page_tables: | 430 | clean_up_page_tables: |
410 | /* Cleans up nvgpu_vm_init_page_tables() */ | 431 | /* Cleans up nvgpu_gmmu_init_page_table() */ |
411 | nvgpu_vfree(g, vm->pdb.entries); | 432 | __nvgpu_pd_cache_free_direct(g, &vm->pdb); |
412 | nvgpu_free_gmmu_pages(vm, &vm->pdb); | ||
413 | clean_up_vgpu_vm: | 433 | clean_up_vgpu_vm: |
414 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 434 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
415 | if (g->is_virtual) | 435 | if (g->is_virtual) |
@@ -525,7 +545,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm) | |||
525 | if (nvgpu_alloc_initialized(&vm->user_lp)) | 545 | if (nvgpu_alloc_initialized(&vm->user_lp)) |
526 | nvgpu_alloc_destroy(&vm->user_lp); | 546 | nvgpu_alloc_destroy(&vm->user_lp); |
527 | 547 | ||
528 | nvgpu_vm_free_entries(vm, &vm->pdb, 0); | 548 | nvgpu_vm_free_entries(vm, &vm->pdb); |
529 | 549 | ||
530 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 550 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
531 | if (g->is_virtual) | 551 | if (g->is_virtual) |