aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/radeon_vm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2014-02-20 04:25:15 -0500
committerChristian König <christian.koenig@amd.com>2014-03-03 05:03:34 -0500
commit2280ab57b6edc8581497d5e101c4694faf839c3e (patch)
treef4caf45e1a29760d9bbde12340ca4608f491b9da /drivers/gpu/drm/radeon/radeon_vm.c
parentb03b4e4b6eb0563f2dc83c482b57b90b637ab81c (diff)
drm/radeon: separate gart and vm functions
Both are complex enough on their own. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_vm.c')
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c981
1 files changed, 981 insertions, 0 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
new file mode 100644
index 000000000000..433b1ebd07ea
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -0,0 +1,981 @@
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28#include <drm/drmP.h>
29#include <drm/radeon_drm.h>
30#include "radeon.h"
31#include "radeon_trace.h"
32
33/*
34 * GPUVM
35 * GPUVM is similar to the legacy gart on older asics, however
36 * rather than there being a single global gart table
37 * for the entire GPU, there are multiple VM page tables active
38 * at any given time. The VM page tables can contain a mix
39 * vram pages and system memory pages and system memory pages
40 * can be mapped as snooped (cached system pages) or unsnooped
41 * (uncached system pages).
42 * Each VM has an ID associated with it and there is a page table
43 * associated with each VMID. When execting a command buffer,
44 * the kernel tells the the ring what VMID to use for that command
45 * buffer. VMIDs are allocated dynamically as commands are submitted.
46 * The userspace drivers maintain their own address space and the kernel
47 * sets up their pages tables accordingly when they submit their
48 * command buffers and a VMID is assigned.
49 * Cayman/Trinity support up to 8 active VMs at any given time;
50 * SI supports 16.
51 */
52
53/**
54 * radeon_vm_num_pde - return the number of page directory entries
55 *
56 * @rdev: radeon_device pointer
57 *
58 * Calculate the number of page directory entries (cayman+).
59 */
60static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
61{
62 return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
63}
64
65/**
66 * radeon_vm_directory_size - returns the size of the page directory in bytes
67 *
68 * @rdev: radeon_device pointer
69 *
70 * Calculate the size of the page directory in bytes (cayman+).
71 */
72static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
73{
74 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
75}
76
77/**
78 * radeon_vm_manager_init - init the vm manager
79 *
80 * @rdev: radeon_device pointer
81 *
82 * Init the vm manager (cayman+).
83 * Returns 0 for success, error for failure.
84 */
85int radeon_vm_manager_init(struct radeon_device *rdev)
86{
87 struct radeon_vm *vm;
88 struct radeon_bo_va *bo_va;
89 int r;
90 unsigned size;
91
92 if (!rdev->vm_manager.enabled) {
93 /* allocate enough for 2 full VM pts */
94 size = radeon_vm_directory_size(rdev);
95 size += rdev->vm_manager.max_pfn * 8;
96 size *= 2;
97 r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
98 RADEON_GPU_PAGE_ALIGN(size),
99 RADEON_VM_PTB_ALIGN_SIZE,
100 RADEON_GEM_DOMAIN_VRAM);
101 if (r) {
102 dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
103 (rdev->vm_manager.max_pfn * 8) >> 10);
104 return r;
105 }
106
107 r = radeon_asic_vm_init(rdev);
108 if (r)
109 return r;
110
111 rdev->vm_manager.enabled = true;
112
113 r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
114 if (r)
115 return r;
116 }
117
118 /* restore page table */
119 list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
120 if (vm->page_directory == NULL)
121 continue;
122
123 list_for_each_entry(bo_va, &vm->va, vm_list) {
124 bo_va->valid = false;
125 }
126 }
127 return 0;
128}
129
130/**
131 * radeon_vm_free_pt - free the page table for a specific vm
132 *
133 * @rdev: radeon_device pointer
134 * @vm: vm to unbind
135 *
136 * Free the page table of a specific vm (cayman+).
137 *
138 * Global and local mutex must be lock!
139 */
140static void radeon_vm_free_pt(struct radeon_device *rdev,
141 struct radeon_vm *vm)
142{
143 struct radeon_bo_va *bo_va;
144 int i;
145
146 if (!vm->page_directory)
147 return;
148
149 list_del_init(&vm->list);
150 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
151
152 list_for_each_entry(bo_va, &vm->va, vm_list) {
153 bo_va->valid = false;
154 }
155
156 if (vm->page_tables == NULL)
157 return;
158
159 for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
160 radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
161
162 kfree(vm->page_tables);
163}
164
165/**
166 * radeon_vm_manager_fini - tear down the vm manager
167 *
168 * @rdev: radeon_device pointer
169 *
170 * Tear down the VM manager (cayman+).
171 */
172void radeon_vm_manager_fini(struct radeon_device *rdev)
173{
174 struct radeon_vm *vm, *tmp;
175 int i;
176
177 if (!rdev->vm_manager.enabled)
178 return;
179
180 mutex_lock(&rdev->vm_manager.lock);
181 /* free all allocated page tables */
182 list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
183 mutex_lock(&vm->mutex);
184 radeon_vm_free_pt(rdev, vm);
185 mutex_unlock(&vm->mutex);
186 }
187 for (i = 0; i < RADEON_NUM_VM; ++i) {
188 radeon_fence_unref(&rdev->vm_manager.active[i]);
189 }
190 radeon_asic_vm_fini(rdev);
191 mutex_unlock(&rdev->vm_manager.lock);
192
193 radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
194 radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
195 rdev->vm_manager.enabled = false;
196}
197
198/**
199 * radeon_vm_evict - evict page table to make room for new one
200 *
201 * @rdev: radeon_device pointer
202 * @vm: VM we want to allocate something for
203 *
204 * Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
205 * Returns 0 for success, -ENOMEM for failure.
206 *
207 * Global and local mutex must be locked!
208 */
209static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
210{
211 struct radeon_vm *vm_evict;
212
213 if (list_empty(&rdev->vm_manager.lru_vm))
214 return -ENOMEM;
215
216 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
217 struct radeon_vm, list);
218 if (vm_evict == vm)
219 return -ENOMEM;
220
221 mutex_lock(&vm_evict->mutex);
222 radeon_vm_free_pt(rdev, vm_evict);
223 mutex_unlock(&vm_evict->mutex);
224 return 0;
225}
226
227/**
228 * radeon_vm_alloc_pt - allocates a page table for a VM
229 *
230 * @rdev: radeon_device pointer
231 * @vm: vm to bind
232 *
233 * Allocate a page table for the requested vm (cayman+).
234 * Returns 0 for success, error for failure.
235 *
236 * Global and local mutex must be locked!
237 */
238int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
239{
240 unsigned pd_size, pd_entries, pts_size;
241 struct radeon_ib ib;
242 int r;
243
244 if (vm == NULL) {
245 return -EINVAL;
246 }
247
248 if (vm->page_directory != NULL) {
249 return 0;
250 }
251
252 pd_size = radeon_vm_directory_size(rdev);
253 pd_entries = radeon_vm_num_pdes(rdev);
254
255retry:
256 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
257 &vm->page_directory, pd_size,
258 RADEON_VM_PTB_ALIGN_SIZE, false);
259 if (r == -ENOMEM) {
260 r = radeon_vm_evict(rdev, vm);
261 if (r)
262 return r;
263 goto retry;
264
265 } else if (r) {
266 return r;
267 }
268
269 vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
270
271 /* Initially clear the page directory */
272 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
273 NULL, pd_entries * 2 + 64);
274 if (r) {
275 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
276 return r;
277 }
278
279 ib.length_dw = 0;
280
281 radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
282 0, pd_entries, 0, 0);
283
284 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
285 r = radeon_ib_schedule(rdev, &ib, NULL);
286 if (r) {
287 radeon_ib_free(rdev, &ib);
288 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
289 return r;
290 }
291 radeon_fence_unref(&vm->fence);
292 vm->fence = radeon_fence_ref(ib.fence);
293 radeon_ib_free(rdev, &ib);
294 radeon_fence_unref(&vm->last_flush);
295
296 /* allocate page table array */
297 pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
298 vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
299
300 if (vm->page_tables == NULL) {
301 DRM_ERROR("Cannot allocate memory for page table array\n");
302 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
303 return -ENOMEM;
304 }
305
306 return 0;
307}
308
309/**
310 * radeon_vm_add_to_lru - add VMs page table to LRU list
311 *
312 * @rdev: radeon_device pointer
313 * @vm: vm to add to LRU
314 *
315 * Add the allocated page table to the LRU list (cayman+).
316 *
317 * Global mutex must be locked!
318 */
319void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
320{
321 list_del_init(&vm->list);
322 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
323}
324
325/**
326 * radeon_vm_grab_id - allocate the next free VMID
327 *
328 * @rdev: radeon_device pointer
329 * @vm: vm to allocate id for
330 * @ring: ring we want to submit job to
331 *
332 * Allocate an id for the vm (cayman+).
333 * Returns the fence we need to sync to (if any).
334 *
335 * Global and local mutex must be locked!
336 */
337struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
338 struct radeon_vm *vm, int ring)
339{
340 struct radeon_fence *best[RADEON_NUM_RINGS] = {};
341 unsigned choices[2] = {};
342 unsigned i;
343
344 /* check if the id is still valid */
345 if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
346 return NULL;
347
348 /* we definately need to flush */
349 radeon_fence_unref(&vm->last_flush);
350
351 /* skip over VMID 0, since it is the system VM */
352 for (i = 1; i < rdev->vm_manager.nvm; ++i) {
353 struct radeon_fence *fence = rdev->vm_manager.active[i];
354
355 if (fence == NULL) {
356 /* found a free one */
357 vm->id = i;
358 trace_radeon_vm_grab_id(vm->id, ring);
359 return NULL;
360 }
361
362 if (radeon_fence_is_earlier(fence, best[fence->ring])) {
363 best[fence->ring] = fence;
364 choices[fence->ring == ring ? 0 : 1] = i;
365 }
366 }
367
368 for (i = 0; i < 2; ++i) {
369 if (choices[i]) {
370 vm->id = choices[i];
371 trace_radeon_vm_grab_id(vm->id, ring);
372 return rdev->vm_manager.active[choices[i]];
373 }
374 }
375
376 /* should never happen */
377 BUG();
378 return NULL;
379}
380
381/**
382 * radeon_vm_fence - remember fence for vm
383 *
384 * @rdev: radeon_device pointer
385 * @vm: vm we want to fence
386 * @fence: fence to remember
387 *
388 * Fence the vm (cayman+).
389 * Set the fence used to protect page table and id.
390 *
391 * Global and local mutex must be locked!
392 */
393void radeon_vm_fence(struct radeon_device *rdev,
394 struct radeon_vm *vm,
395 struct radeon_fence *fence)
396{
397 radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
398 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
399
400 radeon_fence_unref(&vm->fence);
401 vm->fence = radeon_fence_ref(fence);
402
403 radeon_fence_unref(&vm->last_id_use);
404 vm->last_id_use = radeon_fence_ref(fence);
405}
406
407/**
408 * radeon_vm_bo_find - find the bo_va for a specific vm & bo
409 *
410 * @vm: requested vm
411 * @bo: requested buffer object
412 *
413 * Find @bo inside the requested vm (cayman+).
414 * Search inside the @bos vm list for the requested vm
415 * Returns the found bo_va or NULL if none is found
416 *
417 * Object has to be reserved!
418 */
419struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
420 struct radeon_bo *bo)
421{
422 struct radeon_bo_va *bo_va;
423
424 list_for_each_entry(bo_va, &bo->va, bo_list) {
425 if (bo_va->vm == vm) {
426 return bo_va;
427 }
428 }
429 return NULL;
430}
431
432/**
433 * radeon_vm_bo_add - add a bo to a specific vm
434 *
435 * @rdev: radeon_device pointer
436 * @vm: requested vm
437 * @bo: radeon buffer object
438 *
439 * Add @bo into the requested vm (cayman+).
440 * Add @bo to the list of bos associated with the vm
441 * Returns newly added bo_va or NULL for failure
442 *
443 * Object has to be reserved!
444 */
445struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
446 struct radeon_vm *vm,
447 struct radeon_bo *bo)
448{
449 struct radeon_bo_va *bo_va;
450
451 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
452 if (bo_va == NULL) {
453 return NULL;
454 }
455 bo_va->vm = vm;
456 bo_va->bo = bo;
457 bo_va->soffset = 0;
458 bo_va->eoffset = 0;
459 bo_va->flags = 0;
460 bo_va->valid = false;
461 bo_va->ref_count = 1;
462 INIT_LIST_HEAD(&bo_va->bo_list);
463 INIT_LIST_HEAD(&bo_va->vm_list);
464
465 mutex_lock(&vm->mutex);
466 list_add(&bo_va->vm_list, &vm->va);
467 list_add_tail(&bo_va->bo_list, &bo->va);
468 mutex_unlock(&vm->mutex);
469
470 return bo_va;
471}
472
473/**
474 * radeon_vm_bo_set_addr - set bos virtual address inside a vm
475 *
476 * @rdev: radeon_device pointer
477 * @bo_va: bo_va to store the address
478 * @soffset: requested offset of the buffer in the VM address space
479 * @flags: attributes of pages (read/write/valid/etc.)
480 *
481 * Set offset of @bo_va (cayman+).
482 * Validate and set the offset requested within the vm address space.
483 * Returns 0 for success, error for failure.
484 *
485 * Object has to be reserved!
486 */
487int radeon_vm_bo_set_addr(struct radeon_device *rdev,
488 struct radeon_bo_va *bo_va,
489 uint64_t soffset,
490 uint32_t flags)
491{
492 uint64_t size = radeon_bo_size(bo_va->bo);
493 uint64_t eoffset, last_offset = 0;
494 struct radeon_vm *vm = bo_va->vm;
495 struct radeon_bo_va *tmp;
496 struct list_head *head;
497 unsigned last_pfn;
498
499 if (soffset) {
500 /* make sure object fit at this offset */
501 eoffset = soffset + size;
502 if (soffset >= eoffset) {
503 return -EINVAL;
504 }
505
506 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
507 if (last_pfn > rdev->vm_manager.max_pfn) {
508 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
509 last_pfn, rdev->vm_manager.max_pfn);
510 return -EINVAL;
511 }
512
513 } else {
514 eoffset = last_pfn = 0;
515 }
516
517 mutex_lock(&vm->mutex);
518 head = &vm->va;
519 last_offset = 0;
520 list_for_each_entry(tmp, &vm->va, vm_list) {
521 if (bo_va == tmp) {
522 /* skip over currently modified bo */
523 continue;
524 }
525
526 if (soffset >= last_offset && eoffset <= tmp->soffset) {
527 /* bo can be added before this one */
528 break;
529 }
530 if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
531 /* bo and tmp overlap, invalid offset */
532 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
533 bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
534 (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
535 mutex_unlock(&vm->mutex);
536 return -EINVAL;
537 }
538 last_offset = tmp->eoffset;
539 head = &tmp->vm_list;
540 }
541
542 bo_va->soffset = soffset;
543 bo_va->eoffset = eoffset;
544 bo_va->flags = flags;
545 bo_va->valid = false;
546 list_move(&bo_va->vm_list, head);
547
548 mutex_unlock(&vm->mutex);
549 return 0;
550}
551
552/**
553 * radeon_vm_map_gart - get the physical address of a gart page
554 *
555 * @rdev: radeon_device pointer
556 * @addr: the unmapped addr
557 *
558 * Look up the physical address of the page that the pte resolves
559 * to (cayman+).
560 * Returns the physical address of the page.
561 */
562uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
563{
564 uint64_t result;
565
566 /* page table offset */
567 result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
568
569 /* in case cpu page size != gpu page size*/
570 result |= addr & (~PAGE_MASK);
571
572 return result;
573}
574
575/**
576 * radeon_vm_page_flags - translate page flags to what the hw uses
577 *
578 * @flags: flags comming from userspace
579 *
580 * Translate the flags the userspace ABI uses to hw flags.
581 */
582static uint32_t radeon_vm_page_flags(uint32_t flags)
583{
584 uint32_t hw_flags = 0;
585 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
586 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
587 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
588 if (flags & RADEON_VM_PAGE_SYSTEM) {
589 hw_flags |= R600_PTE_SYSTEM;
590 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
591 }
592 return hw_flags;
593}
594
595/**
596 * radeon_vm_update_pdes - make sure that page directory is valid
597 *
598 * @rdev: radeon_device pointer
599 * @vm: requested vm
600 * @start: start of GPU address range
601 * @end: end of GPU address range
602 *
603 * Allocates new page tables if necessary
604 * and updates the page directory (cayman+).
605 * Returns 0 for success, error for failure.
606 *
607 * Global and local mutex must be locked!
608 */
609static int radeon_vm_update_pdes(struct radeon_device *rdev,
610 struct radeon_vm *vm,
611 struct radeon_ib *ib,
612 uint64_t start, uint64_t end)
613{
614 static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
615
616 uint64_t last_pde = ~0, last_pt = ~0;
617 unsigned count = 0;
618 uint64_t pt_idx;
619 int r;
620
621 start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
622 end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
623
624 /* walk over the address space and update the page directory */
625 for (pt_idx = start; pt_idx <= end; ++pt_idx) {
626 uint64_t pde, pt;
627
628 if (vm->page_tables[pt_idx])
629 continue;
630
631retry:
632 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
633 &vm->page_tables[pt_idx],
634 RADEON_VM_PTE_COUNT * 8,
635 RADEON_GPU_PAGE_SIZE, false);
636
637 if (r == -ENOMEM) {
638 r = radeon_vm_evict(rdev, vm);
639 if (r)
640 return r;
641 goto retry;
642 } else if (r) {
643 return r;
644 }
645
646 pde = vm->pd_gpu_addr + pt_idx * 8;
647
648 pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
649
650 if (((last_pde + 8 * count) != pde) ||
651 ((last_pt + incr * count) != pt)) {
652
653 if (count) {
654 radeon_asic_vm_set_page(rdev, ib, last_pde,
655 last_pt, count, incr,
656 R600_PTE_VALID);
657
658 count *= RADEON_VM_PTE_COUNT;
659 radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
660 count, 0, 0);
661 }
662
663 count = 1;
664 last_pde = pde;
665 last_pt = pt;
666 } else {
667 ++count;
668 }
669 }
670
671 if (count) {
672 radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
673 incr, R600_PTE_VALID);
674
675 count *= RADEON_VM_PTE_COUNT;
676 radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
677 count, 0, 0);
678 }
679
680 return 0;
681}
682
683/**
684 * radeon_vm_update_ptes - make sure that page tables are valid
685 *
686 * @rdev: radeon_device pointer
687 * @vm: requested vm
688 * @start: start of GPU address range
689 * @end: end of GPU address range
690 * @dst: destination address to map to
691 * @flags: mapping flags
692 *
693 * Update the page tables in the range @start - @end (cayman+).
694 *
695 * Global and local mutex must be locked!
696 */
697static void radeon_vm_update_ptes(struct radeon_device *rdev,
698 struct radeon_vm *vm,
699 struct radeon_ib *ib,
700 uint64_t start, uint64_t end,
701 uint64_t dst, uint32_t flags)
702{
703 static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
704
705 uint64_t last_pte = ~0, last_dst = ~0;
706 unsigned count = 0;
707 uint64_t addr;
708
709 start = start / RADEON_GPU_PAGE_SIZE;
710 end = end / RADEON_GPU_PAGE_SIZE;
711
712 /* walk over the address space and update the page tables */
713 for (addr = start; addr < end; ) {
714 uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
715 unsigned nptes;
716 uint64_t pte;
717
718 if ((addr & ~mask) == (end & ~mask))
719 nptes = end - addr;
720 else
721 nptes = RADEON_VM_PTE_COUNT - (addr & mask);
722
723 pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
724 pte += (addr & mask) * 8;
725
726 if ((last_pte + 8 * count) != pte) {
727
728 if (count) {
729 radeon_asic_vm_set_page(rdev, ib, last_pte,
730 last_dst, count,
731 RADEON_GPU_PAGE_SIZE,
732 flags);
733 }
734
735 count = nptes;
736 last_pte = pte;
737 last_dst = dst;
738 } else {
739 count += nptes;
740 }
741
742 addr += nptes;
743 dst += nptes * RADEON_GPU_PAGE_SIZE;
744 }
745
746 if (count) {
747 radeon_asic_vm_set_page(rdev, ib, last_pte,
748 last_dst, count,
749 RADEON_GPU_PAGE_SIZE, flags);
750 }
751}
752
753/**
754 * radeon_vm_bo_update - map a bo into the vm page table
755 *
756 * @rdev: radeon_device pointer
757 * @vm: requested vm
758 * @bo: radeon buffer object
759 * @mem: ttm mem
760 *
761 * Fill in the page table entries for @bo (cayman+).
762 * Returns 0 for success, -EINVAL for failure.
763 *
764 * Object have to be reserved & global and local mutex must be locked!
765 */
766int radeon_vm_bo_update(struct radeon_device *rdev,
767 struct radeon_vm *vm,
768 struct radeon_bo *bo,
769 struct ttm_mem_reg *mem)
770{
771 struct radeon_ib ib;
772 struct radeon_bo_va *bo_va;
773 unsigned nptes, npdes, ndw;
774 uint64_t addr;
775 int r;
776
777 /* nothing to do if vm isn't bound */
778 if (vm->page_directory == NULL)
779 return 0;
780
781 bo_va = radeon_vm_bo_find(vm, bo);
782 if (bo_va == NULL) {
783 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
784 return -EINVAL;
785 }
786
787 if (!bo_va->soffset) {
788 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
789 bo, vm);
790 return -EINVAL;
791 }
792
793 if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
794 return 0;
795
796 bo_va->flags &= ~RADEON_VM_PAGE_VALID;
797 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
798 if (mem) {
799 addr = mem->start << PAGE_SHIFT;
800 if (mem->mem_type != TTM_PL_SYSTEM) {
801 bo_va->flags |= RADEON_VM_PAGE_VALID;
802 bo_va->valid = true;
803 }
804 if (mem->mem_type == TTM_PL_TT) {
805 bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
806 } else {
807 addr += rdev->vm_manager.vram_base_offset;
808 }
809 } else {
810 addr = 0;
811 bo_va->valid = false;
812 }
813
814 trace_radeon_vm_bo_update(bo_va);
815
816 nptes = radeon_bo_ngpu_pages(bo);
817
818 /* assume two extra pdes in case the mapping overlaps the borders */
819 npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
820
821 /* padding, etc. */
822 ndw = 64;
823
824 if (RADEON_VM_BLOCK_SIZE > 11)
825 /* reserve space for one header for every 2k dwords */
826 ndw += (nptes >> 11) * 4;
827 else
828 /* reserve space for one header for
829 every (1 << BLOCK_SIZE) entries */
830 ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
831
832 /* reserve space for pte addresses */
833 ndw += nptes * 2;
834
835 /* reserve space for one header for every 2k dwords */
836 ndw += (npdes >> 11) * 4;
837
838 /* reserve space for pde addresses */
839 ndw += npdes * 2;
840
841 /* reserve space for clearing new page tables */
842 ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
843
844 /* update too big for an IB */
845 if (ndw > 0xfffff)
846 return -ENOMEM;
847
848 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
849 if (r)
850 return r;
851 ib.length_dw = 0;
852
853 r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
854 if (r) {
855 radeon_ib_free(rdev, &ib);
856 return r;
857 }
858
859 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
860 addr, radeon_vm_page_flags(bo_va->flags));
861
862 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
863 r = radeon_ib_schedule(rdev, &ib, NULL);
864 if (r) {
865 radeon_ib_free(rdev, &ib);
866 return r;
867 }
868 radeon_fence_unref(&vm->fence);
869 vm->fence = radeon_fence_ref(ib.fence);
870 radeon_ib_free(rdev, &ib);
871 radeon_fence_unref(&vm->last_flush);
872
873 return 0;
874}
875
876/**
877 * radeon_vm_bo_rmv - remove a bo to a specific vm
878 *
879 * @rdev: radeon_device pointer
880 * @bo_va: requested bo_va
881 *
882 * Remove @bo_va->bo from the requested vm (cayman+).
883 * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
884 * remove the ptes for @bo_va in the page table.
885 * Returns 0 for success.
886 *
887 * Object have to be reserved!
888 */
889int radeon_vm_bo_rmv(struct radeon_device *rdev,
890 struct radeon_bo_va *bo_va)
891{
892 int r = 0;
893
894 mutex_lock(&rdev->vm_manager.lock);
895 mutex_lock(&bo_va->vm->mutex);
896 if (bo_va->soffset) {
897 r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
898 }
899 mutex_unlock(&rdev->vm_manager.lock);
900 list_del(&bo_va->vm_list);
901 mutex_unlock(&bo_va->vm->mutex);
902 list_del(&bo_va->bo_list);
903
904 kfree(bo_va);
905 return r;
906}
907
908/**
909 * radeon_vm_bo_invalidate - mark the bo as invalid
910 *
911 * @rdev: radeon_device pointer
912 * @vm: requested vm
913 * @bo: radeon buffer object
914 *
915 * Mark @bo as invalid (cayman+).
916 */
917void radeon_vm_bo_invalidate(struct radeon_device *rdev,
918 struct radeon_bo *bo)
919{
920 struct radeon_bo_va *bo_va;
921
922 list_for_each_entry(bo_va, &bo->va, bo_list) {
923 bo_va->valid = false;
924 }
925}
926
927/**
928 * radeon_vm_init - initialize a vm instance
929 *
930 * @rdev: radeon_device pointer
931 * @vm: requested vm
932 *
933 * Init @vm fields (cayman+).
934 */
935void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
936{
937 vm->id = 0;
938 vm->fence = NULL;
939 vm->last_flush = NULL;
940 vm->last_id_use = NULL;
941 mutex_init(&vm->mutex);
942 INIT_LIST_HEAD(&vm->list);
943 INIT_LIST_HEAD(&vm->va);
944}
945
946/**
947 * radeon_vm_fini - tear down a vm instance
948 *
949 * @rdev: radeon_device pointer
950 * @vm: requested vm
951 *
952 * Tear down @vm (cayman+).
953 * Unbind the VM and remove all bos from the vm bo list
954 */
955void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
956{
957 struct radeon_bo_va *bo_va, *tmp;
958 int r;
959
960 mutex_lock(&rdev->vm_manager.lock);
961 mutex_lock(&vm->mutex);
962 radeon_vm_free_pt(rdev, vm);
963 mutex_unlock(&rdev->vm_manager.lock);
964
965 if (!list_empty(&vm->va)) {
966 dev_err(rdev->dev, "still active bo inside vm\n");
967 }
968 list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
969 list_del_init(&bo_va->vm_list);
970 r = radeon_bo_reserve(bo_va->bo, false);
971 if (!r) {
972 list_del_init(&bo_va->bo_list);
973 radeon_bo_unreserve(bo_va->bo);
974 kfree(bo_va);
975 }
976 }
977 radeon_fence_unref(&vm->fence);
978 radeon_fence_unref(&vm->last_flush);
979 radeon_fence_unref(&vm->last_id_use);
980 mutex_unlock(&vm->mutex);
981}