summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/vm.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-05-01 19:12:16 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-26 06:33:57 -0400
commitfbafc7eba41ba7654dfdadf51a53acf1638e9fa1 (patch)
tree7457efcb4c9fb9e1f2121267d980f2c380c7f0f6 /drivers/gpu/nvgpu/common/mm/vm.c
parentf76febb962e1681690dd378884f51770e7271820 (diff)
gpu: nvgpu: Refactor VM init/cleanup
Refactor the API for initializing and cleaning up VMs. This also involved moving a bunch of GMMU code out into the gmmu code since part of initializing a VM involves initializing the page tables for the VM. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I4710f08c26a6e39806f0762a35f6db5c94b64c50 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1477746 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/vm.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/vm.c344
1 files changed, 339 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 3b3b7a10..e42c7c5a 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -14,6 +14,8 @@
14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */ 15 */
16 16
17#include <nvgpu/log.h>
18#include <nvgpu/dma.h>
17#include <nvgpu/vm.h> 19#include <nvgpu/vm.h>
18#include <nvgpu/vm_area.h> 20#include <nvgpu/vm_area.h>
19#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
@@ -23,6 +25,7 @@
23 25
24#include "gk20a/gk20a.h" 26#include "gk20a/gk20a.h"
25#include "gk20a/mm_gk20a.h" 27#include "gk20a/mm_gk20a.h"
28#include "gk20a/platform_gk20a.h"
26 29
27int vm_aspace_id(struct vm_gk20a *vm) 30int vm_aspace_id(struct vm_gk20a *vm)
28{ 31{
@@ -104,6 +107,341 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
104 nvgpu_mutex_release(&vm->update_gmmu_lock); 107 nvgpu_mutex_release(&vm->update_gmmu_lock);
105} 108}
106 109
110static int nvgpu_vm_init_page_tables(struct vm_gk20a *vm)
111{
112 u32 pde_lo, pde_hi;
113 int err;
114
115 pde_range_from_vaddr_range(vm,
116 0, vm->va_limit-1,
117 &pde_lo, &pde_hi);
118 vm->pdb.entries = nvgpu_vzalloc(vm->mm->g,
119 sizeof(struct gk20a_mm_entry) *
120 (pde_hi + 1));
121 vm->pdb.num_entries = pde_hi + 1;
122
123 if (!vm->pdb.entries)
124 return -ENOMEM;
125
126 err = nvgpu_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0],
127 &vm->pdb, NULL);
128 if (err) {
129 nvgpu_vfree(vm->mm->g, vm->pdb.entries);
130 return err;
131 }
132
133 return 0;
134}
135
136/*
137 * Determine if the passed address space can support big pages or not.
138 */
139int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
140{
141 u64 mask = ((u64)vm->big_page_size << 10) - 1;
142
143 if (base & mask || size & mask)
144 return 0;
145 return 1;
146}
147
148/*
149 * Initialize a semaphore pool. Just return successfully if we do not need
150 * semaphores (i.e when sync-pts are active).
151 */
152static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
153{
154 struct nvgpu_semaphore_sea *sema_sea;
155 struct mm_gk20a *mm = vm->mm;
156 struct gk20a *g = mm->g;
157 int err;
158
159 /*
160 * Don't waste the memory on semaphores if we don't need them.
161 */
162 if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)
163 return 0;
164
165 if (vm->sema_pool)
166 return 0;
167
168 sema_sea = nvgpu_semaphore_sea_create(g);
169 if (!sema_sea)
170 return -ENOMEM;
171
172 vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea);
173 if (!vm->sema_pool)
174 return -ENOMEM;
175
176 /*
177 * Allocate a chunk of GPU VA space for mapping the semaphores. We will
178 * do a fixed alloc in the kernel VM so that all channels have the same
179 * RO address range for the semaphores.
180 *
181 * !!! TODO: cleanup.
182 */
183 sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
184 vm->va_limit -
185 mm->channel.kernel_size,
186 512 * PAGE_SIZE,
187 SZ_4K);
188 if (!sema_sea->gpu_va) {
189 nvgpu_free(&vm->kernel, sema_sea->gpu_va);
190 nvgpu_vm_put(vm);
191 return -ENOMEM;
192 }
193
194 err = nvgpu_semaphore_pool_map(vm->sema_pool, vm);
195 if (err) {
196 nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
197 nvgpu_free(vm->vma[gmmu_page_size_small],
198 vm->sema_pool->gpu_va);
199 return err;
200 }
201
202 return 0;
203}
204
205/**
206 * nvgpu_init_vm() - Initialize an address space.
207 *
208 * @mm - Parent MM.
209 * @vm - The VM to init.
210 * @big_page_size - Size of big pages associated with this VM.
211 * @low_hole - The size of the low hole (unaddressable memory at the bottom of
212 * the address space.
213 * @kernel_reserved - Space reserved for kernel only allocations.
214 * @aperture_size - Total size of the aperture.
215 * @big_pages - Ignored. Will be set based on other passed params.
216 * @name - Name of the address space.
217 *
218 * This function initializes an address space according to the following map:
219 *
220 * +--+ 0x0
221 * | |
222 * +--+ @low_hole
223 * | |
224 * ~ ~ This is the "user" section.
225 * | |
226 * +--+ @aperture_size - @kernel_reserved
227 * | |
228 * ~ ~ This is the "kernel" section.
229 * | |
230 * +--+ @aperture_size
231 *
232 * The user section is therefor what ever is left over after the @low_hole and
233 * @kernel_reserved memory have been portioned out. The @kernel_reserved is
234 * always persent at the top of the memory space and the @low_hole is always at
235 * the bottom.
236 *
237 * For certain address spaces a "user" section makes no sense (bar1, etc) so in
238 * such cases the @kernel_reserved and @low_hole should sum to exactly
239 * @aperture_size.
240 */
241int nvgpu_init_vm(struct mm_gk20a *mm,
242 struct vm_gk20a *vm,
243 u32 big_page_size,
244 u64 low_hole,
245 u64 kernel_reserved,
246 u64 aperture_size,
247 bool big_pages,
248 bool userspace_managed,
249 char *name)
250{
251 int err;
252 char alloc_name[32];
253 u64 kernel_vma_flags;
254 u64 user_vma_start, user_vma_limit;
255 u64 user_lp_vma_start, user_lp_vma_limit;
256 u64 kernel_vma_start, kernel_vma_limit;
257 struct gk20a *g = mm->g;
258 struct gk20a_platform *p = gk20a_get_platform(g->dev);
259
260 if (WARN_ON(kernel_reserved + low_hole > aperture_size))
261 return -ENOMEM;
262
263 nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, "
264 "LP size=0x%x lowhole=0x%llx",
265 name, aperture_size,
266 (unsigned int)big_page_size, low_hole);
267
268 vm->mm = mm;
269
270 vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K;
271 vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size;
272 vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
273
274 /* Set up vma pointers. */
275 vm->vma[gmmu_page_size_small] = &vm->user;
276 vm->vma[gmmu_page_size_big] = &vm->user;
277 vm->vma[gmmu_page_size_kernel] = &vm->kernel;
278 if (!p->unify_address_spaces)
279 vm->vma[gmmu_page_size_big] = &vm->user_lp;
280
281 vm->va_start = low_hole;
282 vm->va_limit = aperture_size;
283 vm->big_pages = big_pages;
284
285 vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big];
286 vm->userspace_managed = userspace_managed;
287 vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
288
289 /* Initialize the page table data structures. */
290 err = nvgpu_vm_init_page_tables(vm);
291 if (err)
292 return err;
293
294 /* Setup vma limits. */
295 if (kernel_reserved + low_hole < aperture_size) {
296 if (p->unify_address_spaces) {
297 user_vma_start = low_hole;
298 user_vma_limit = vm->va_limit - kernel_reserved;
299 user_lp_vma_start = user_vma_limit;
300 user_lp_vma_limit = user_vma_limit;
301 } else {
302 user_vma_start = low_hole;
303 user_vma_limit = __nv_gmmu_va_small_page_limit();
304 user_lp_vma_start = __nv_gmmu_va_small_page_limit();
305 user_lp_vma_limit = vm->va_limit - kernel_reserved;
306 }
307 } else {
308 user_vma_start = 0;
309 user_vma_limit = 0;
310 user_lp_vma_start = 0;
311 user_lp_vma_limit = 0;
312 }
313 kernel_vma_start = vm->va_limit - kernel_reserved;
314 kernel_vma_limit = vm->va_limit;
315
316 nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)",
317 user_vma_start, user_vma_limit);
318 nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)",
319 user_lp_vma_start, user_lp_vma_limit);
320 nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)",
321 kernel_vma_start, kernel_vma_limit);
322
323 if (WARN_ON(user_vma_start > user_vma_limit) ||
324 WARN_ON(user_lp_vma_start > user_lp_vma_limit) ||
325 WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
326 err = -EINVAL;
327 goto clean_up_page_tables;
328 }
329
330 kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
331 0 : GPU_ALLOC_GVA_SPACE;
332
333 /*
334 * A "user" area only makes sense for the GVA spaces. For VMs where
335 * there is no "user" area user_vma_start will be equal to
336 * user_vma_limit (i.e a 0 sized space). In such a situation the kernel
337 * area must be non-zero in length.
338 */
339 if (user_vma_start >= user_vma_limit &&
340 kernel_vma_start >= kernel_vma_limit) {
341 err = -EINVAL;
342 goto clean_up_page_tables;
343 }
344
345 /*
346 * Determine if big pages are possible in this VM. If a split address
347 * space is used then check the user_lp vma instead of the user vma.
348 */
349 if (p->unify_address_spaces)
350 vm->big_pages = nvgpu_big_pages_possible(vm, user_vma_start,
351 user_vma_limit - user_vma_start);
352 else
353 vm->big_pages = nvgpu_big_pages_possible(vm, user_lp_vma_start,
354 user_lp_vma_limit - user_lp_vma_start);
355
356 /*
357 * User VMA.
358 */
359 if (user_vma_start < user_vma_limit) {
360 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
361 err = __nvgpu_buddy_allocator_init(g, &vm->user,
362 vm, alloc_name,
363 user_vma_start,
364 user_vma_limit -
365 user_vma_start,
366 SZ_4K,
367 GPU_BALLOC_MAX_ORDER,
368 GPU_ALLOC_GVA_SPACE);
369 if (err)
370 goto clean_up_page_tables;
371 } else {
372 /*
373 * Make these allocator pointers point to the kernel allocator
374 * since we still use the legacy notion of page size to choose
375 * the allocator.
376 */
377 vm->vma[0] = &vm->kernel;
378 vm->vma[1] = &vm->kernel;
379 }
380
381 /*
382 * User VMA for large pages when a split address range is used.
383 */
384 if (user_lp_vma_start < user_lp_vma_limit) {
385 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
386 err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
387 vm, alloc_name,
388 user_lp_vma_start,
389 user_lp_vma_limit -
390 user_lp_vma_start,
391 vm->big_page_size,
392 GPU_BALLOC_MAX_ORDER,
393 GPU_ALLOC_GVA_SPACE);
394 if (err)
395 goto clean_up_allocators;
396 }
397
398 /*
399 * Kernel VMA. Must always exist for an address space.
400 */
401 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
402 err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
403 vm, alloc_name,
404 kernel_vma_start,
405 kernel_vma_limit - kernel_vma_start,
406 SZ_4K,
407 GPU_BALLOC_MAX_ORDER,
408 kernel_vma_flags);
409 if (err)
410 goto clean_up_allocators;
411
412 vm->mapped_buffers = NULL;
413
414 nvgpu_mutex_init(&vm->update_gmmu_lock);
415 kref_init(&vm->ref);
416 nvgpu_init_list_node(&vm->vm_area_list);
417
418 /*
419 * This is only necessary for channel address spaces. The best way to
420 * distinguish channel address spaces from other address spaces is by
421 * size - if the address space is 4GB or less, it's not a channel.
422 */
423 if (vm->va_limit > SZ_4G) {
424 err = nvgpu_init_sema_pool(vm);
425 if (err)
426 goto clean_up_allocators;
427 }
428
429 return 0;
430
431clean_up_allocators:
432 if (nvgpu_alloc_initialized(&vm->kernel))
433 nvgpu_alloc_destroy(&vm->kernel);
434 if (nvgpu_alloc_initialized(&vm->user))
435 nvgpu_alloc_destroy(&vm->user);
436 if (nvgpu_alloc_initialized(&vm->user_lp))
437 nvgpu_alloc_destroy(&vm->user_lp);
438clean_up_page_tables:
439 /* Cleans up nvgpu_vm_init_page_tables() */
440 nvgpu_vfree(g, vm->pdb.entries);
441 free_gmmu_pages(vm, &vm->pdb);
442 return err;
443}
444
107void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) 445void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
108{ 446{
109 struct nvgpu_mapped_buf *mapped_buffer; 447 struct nvgpu_mapped_buf *mapped_buffer;
@@ -111,8 +449,6 @@ void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
111 struct nvgpu_rbtree_node *node = NULL; 449 struct nvgpu_rbtree_node *node = NULL;
112 struct gk20a *g = vm->mm->g; 450 struct gk20a *g = vm->mm->g;
113 451
114 gk20a_dbg_fn("");
115
116 /* 452 /*
117 * Do this outside of the update_gmmu_lock since unmapping the semaphore 453 * Do this outside of the update_gmmu_lock since unmapping the semaphore
118 * pool involves unmapping a GMMU mapping which means aquiring the 454 * pool involves unmapping a GMMU mapping which means aquiring the
@@ -172,12 +508,10 @@ void nvgpu_vm_put(struct vm_gk20a *vm)
172 kref_put(&vm->ref, nvgpu_vm_remove_support_kref); 508 kref_put(&vm->ref, nvgpu_vm_remove_support_kref);
173} 509}
174 510
175void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) 511void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
176{ 512{
177 struct gk20a *g = vm->mm->g; 513 struct gk20a *g = vm->mm->g;
178 514
179 gk20a_dbg_fn("");
180
181 gk20a_free_inst_block(g, inst_block); 515 gk20a_free_inst_block(g, inst_block);
182 nvgpu_vm_remove_support_nofree(vm); 516 nvgpu_vm_remove_support_nofree(vm);
183} 517}