aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/Makefile2
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c958
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c981
3 files changed, 982 insertions, 959 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index ed60caa32518..09433534dc47 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
80 r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ 80 r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
81 rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ 81 rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
82 trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ 82 trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
83 ci_dpm.o dce6_afmt.o 83 ci_dpm.o dce6_afmt.o radeon_vm.o
84 84
85# add async DMA block 85# add async DMA block
86radeon-y += \ 86radeon-y += \
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index a8f9b463bf2a..2e723651069b 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -28,8 +28,6 @@
28#include <drm/drmP.h> 28#include <drm/drmP.h>
29#include <drm/radeon_drm.h> 29#include <drm/radeon_drm.h>
30#include "radeon.h" 30#include "radeon.h"
31#include "radeon_reg.h"
32#include "radeon_trace.h"
33 31
34/* 32/*
35 * GART 33 * GART
@@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev)
394 392
395 radeon_dummy_page_fini(rdev); 393 radeon_dummy_page_fini(rdev);
396} 394}
397
398/*
399 * GPUVM
400 * GPUVM is similar to the legacy gart on older asics, however
401 * rather than there being a single global gart table
402 * for the entire GPU, there are multiple VM page tables active
403 * at any given time. The VM page tables can contain a mix
404 * vram pages and system memory pages and system memory pages
405 * can be mapped as snooped (cached system pages) or unsnooped
406 * (uncached system pages).
407 * Each VM has an ID associated with it and there is a page table
408 * associated with each VMID. When execting a command buffer,
409 * the kernel tells the the ring what VMID to use for that command
410 * buffer. VMIDs are allocated dynamically as commands are submitted.
411 * The userspace drivers maintain their own address space and the kernel
412 * sets up their pages tables accordingly when they submit their
413 * command buffers and a VMID is assigned.
414 * Cayman/Trinity support up to 8 active VMs at any given time;
415 * SI supports 16.
416 */
417
418/*
419 * vm helpers
420 *
421 * TODO bind a default page at vm initialization for default address
422 */
423
424/**
425 * radeon_vm_num_pde - return the number of page directory entries
426 *
427 * @rdev: radeon_device pointer
428 *
429 * Calculate the number of page directory entries (cayman+).
430 */
431static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
432{
433 return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
434}
435
436/**
437 * radeon_vm_directory_size - returns the size of the page directory in bytes
438 *
439 * @rdev: radeon_device pointer
440 *
441 * Calculate the size of the page directory in bytes (cayman+).
442 */
443static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
444{
445 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
446}
447
448/**
449 * radeon_vm_manager_init - init the vm manager
450 *
451 * @rdev: radeon_device pointer
452 *
453 * Init the vm manager (cayman+).
454 * Returns 0 for success, error for failure.
455 */
456int radeon_vm_manager_init(struct radeon_device *rdev)
457{
458 struct radeon_vm *vm;
459 struct radeon_bo_va *bo_va;
460 int r;
461 unsigned size;
462
463 if (!rdev->vm_manager.enabled) {
464 /* allocate enough for 2 full VM pts */
465 size = radeon_vm_directory_size(rdev);
466 size += rdev->vm_manager.max_pfn * 8;
467 size *= 2;
468 r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
469 RADEON_GPU_PAGE_ALIGN(size),
470 RADEON_VM_PTB_ALIGN_SIZE,
471 RADEON_GEM_DOMAIN_VRAM);
472 if (r) {
473 dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
474 (rdev->vm_manager.max_pfn * 8) >> 10);
475 return r;
476 }
477
478 r = radeon_asic_vm_init(rdev);
479 if (r)
480 return r;
481
482 rdev->vm_manager.enabled = true;
483
484 r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
485 if (r)
486 return r;
487 }
488
489 /* restore page table */
490 list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
491 if (vm->page_directory == NULL)
492 continue;
493
494 list_for_each_entry(bo_va, &vm->va, vm_list) {
495 bo_va->valid = false;
496 }
497 }
498 return 0;
499}
500
501/**
502 * radeon_vm_free_pt - free the page table for a specific vm
503 *
504 * @rdev: radeon_device pointer
505 * @vm: vm to unbind
506 *
507 * Free the page table of a specific vm (cayman+).
508 *
509 * Global and local mutex must be lock!
510 */
511static void radeon_vm_free_pt(struct radeon_device *rdev,
512 struct radeon_vm *vm)
513{
514 struct radeon_bo_va *bo_va;
515 int i;
516
517 if (!vm->page_directory)
518 return;
519
520 list_del_init(&vm->list);
521 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
522
523 list_for_each_entry(bo_va, &vm->va, vm_list) {
524 bo_va->valid = false;
525 }
526
527 if (vm->page_tables == NULL)
528 return;
529
530 for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
531 radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
532
533 kfree(vm->page_tables);
534}
535
536/**
537 * radeon_vm_manager_fini - tear down the vm manager
538 *
539 * @rdev: radeon_device pointer
540 *
541 * Tear down the VM manager (cayman+).
542 */
543void radeon_vm_manager_fini(struct radeon_device *rdev)
544{
545 struct radeon_vm *vm, *tmp;
546 int i;
547
548 if (!rdev->vm_manager.enabled)
549 return;
550
551 mutex_lock(&rdev->vm_manager.lock);
552 /* free all allocated page tables */
553 list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
554 mutex_lock(&vm->mutex);
555 radeon_vm_free_pt(rdev, vm);
556 mutex_unlock(&vm->mutex);
557 }
558 for (i = 0; i < RADEON_NUM_VM; ++i) {
559 radeon_fence_unref(&rdev->vm_manager.active[i]);
560 }
561 radeon_asic_vm_fini(rdev);
562 mutex_unlock(&rdev->vm_manager.lock);
563
564 radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
565 radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
566 rdev->vm_manager.enabled = false;
567}
568
569/**
570 * radeon_vm_evict - evict page table to make room for new one
571 *
572 * @rdev: radeon_device pointer
573 * @vm: VM we want to allocate something for
574 *
575 * Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
576 * Returns 0 for success, -ENOMEM for failure.
577 *
578 * Global and local mutex must be locked!
579 */
580static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
581{
582 struct radeon_vm *vm_evict;
583
584 if (list_empty(&rdev->vm_manager.lru_vm))
585 return -ENOMEM;
586
587 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
588 struct radeon_vm, list);
589 if (vm_evict == vm)
590 return -ENOMEM;
591
592 mutex_lock(&vm_evict->mutex);
593 radeon_vm_free_pt(rdev, vm_evict);
594 mutex_unlock(&vm_evict->mutex);
595 return 0;
596}
597
598/**
599 * radeon_vm_alloc_pt - allocates a page table for a VM
600 *
601 * @rdev: radeon_device pointer
602 * @vm: vm to bind
603 *
604 * Allocate a page table for the requested vm (cayman+).
605 * Returns 0 for success, error for failure.
606 *
607 * Global and local mutex must be locked!
608 */
609int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
610{
611 unsigned pd_size, pd_entries, pts_size;
612 struct radeon_ib ib;
613 int r;
614
615 if (vm == NULL) {
616 return -EINVAL;
617 }
618
619 if (vm->page_directory != NULL) {
620 return 0;
621 }
622
623 pd_size = radeon_vm_directory_size(rdev);
624 pd_entries = radeon_vm_num_pdes(rdev);
625
626retry:
627 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
628 &vm->page_directory, pd_size,
629 RADEON_VM_PTB_ALIGN_SIZE, false);
630 if (r == -ENOMEM) {
631 r = radeon_vm_evict(rdev, vm);
632 if (r)
633 return r;
634 goto retry;
635
636 } else if (r) {
637 return r;
638 }
639
640 vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
641
642 /* Initially clear the page directory */
643 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
644 NULL, pd_entries * 2 + 64);
645 if (r) {
646 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
647 return r;
648 }
649
650 ib.length_dw = 0;
651
652 radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
653 0, pd_entries, 0, 0);
654
655 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
656 r = radeon_ib_schedule(rdev, &ib, NULL);
657 if (r) {
658 radeon_ib_free(rdev, &ib);
659 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
660 return r;
661 }
662 radeon_fence_unref(&vm->fence);
663 vm->fence = radeon_fence_ref(ib.fence);
664 radeon_ib_free(rdev, &ib);
665 radeon_fence_unref(&vm->last_flush);
666
667 /* allocate page table array */
668 pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
669 vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
670
671 if (vm->page_tables == NULL) {
672 DRM_ERROR("Cannot allocate memory for page table array\n");
673 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
674 return -ENOMEM;
675 }
676
677 return 0;
678}
679
680/**
681 * radeon_vm_add_to_lru - add VMs page table to LRU list
682 *
683 * @rdev: radeon_device pointer
684 * @vm: vm to add to LRU
685 *
686 * Add the allocated page table to the LRU list (cayman+).
687 *
688 * Global mutex must be locked!
689 */
690void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
691{
692 list_del_init(&vm->list);
693 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
694}
695
696/**
697 * radeon_vm_grab_id - allocate the next free VMID
698 *
699 * @rdev: radeon_device pointer
700 * @vm: vm to allocate id for
701 * @ring: ring we want to submit job to
702 *
703 * Allocate an id for the vm (cayman+).
704 * Returns the fence we need to sync to (if any).
705 *
706 * Global and local mutex must be locked!
707 */
708struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
709 struct radeon_vm *vm, int ring)
710{
711 struct radeon_fence *best[RADEON_NUM_RINGS] = {};
712 unsigned choices[2] = {};
713 unsigned i;
714
715 /* check if the id is still valid */
716 if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
717 return NULL;
718
719 /* we definately need to flush */
720 radeon_fence_unref(&vm->last_flush);
721
722 /* skip over VMID 0, since it is the system VM */
723 for (i = 1; i < rdev->vm_manager.nvm; ++i) {
724 struct radeon_fence *fence = rdev->vm_manager.active[i];
725
726 if (fence == NULL) {
727 /* found a free one */
728 vm->id = i;
729 trace_radeon_vm_grab_id(vm->id, ring);
730 return NULL;
731 }
732
733 if (radeon_fence_is_earlier(fence, best[fence->ring])) {
734 best[fence->ring] = fence;
735 choices[fence->ring == ring ? 0 : 1] = i;
736 }
737 }
738
739 for (i = 0; i < 2; ++i) {
740 if (choices[i]) {
741 vm->id = choices[i];
742 trace_radeon_vm_grab_id(vm->id, ring);
743 return rdev->vm_manager.active[choices[i]];
744 }
745 }
746
747 /* should never happen */
748 BUG();
749 return NULL;
750}
751
752/**
753 * radeon_vm_fence - remember fence for vm
754 *
755 * @rdev: radeon_device pointer
756 * @vm: vm we want to fence
757 * @fence: fence to remember
758 *
759 * Fence the vm (cayman+).
760 * Set the fence used to protect page table and id.
761 *
762 * Global and local mutex must be locked!
763 */
764void radeon_vm_fence(struct radeon_device *rdev,
765 struct radeon_vm *vm,
766 struct radeon_fence *fence)
767{
768 radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
769 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
770
771 radeon_fence_unref(&vm->fence);
772 vm->fence = radeon_fence_ref(fence);
773
774 radeon_fence_unref(&vm->last_id_use);
775 vm->last_id_use = radeon_fence_ref(fence);
776}
777
778/**
779 * radeon_vm_bo_find - find the bo_va for a specific vm & bo
780 *
781 * @vm: requested vm
782 * @bo: requested buffer object
783 *
784 * Find @bo inside the requested vm (cayman+).
785 * Search inside the @bos vm list for the requested vm
786 * Returns the found bo_va or NULL if none is found
787 *
788 * Object has to be reserved!
789 */
790struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
791 struct radeon_bo *bo)
792{
793 struct radeon_bo_va *bo_va;
794
795 list_for_each_entry(bo_va, &bo->va, bo_list) {
796 if (bo_va->vm == vm) {
797 return bo_va;
798 }
799 }
800 return NULL;
801}
802
803/**
804 * radeon_vm_bo_add - add a bo to a specific vm
805 *
806 * @rdev: radeon_device pointer
807 * @vm: requested vm
808 * @bo: radeon buffer object
809 *
810 * Add @bo into the requested vm (cayman+).
811 * Add @bo to the list of bos associated with the vm
812 * Returns newly added bo_va or NULL for failure
813 *
814 * Object has to be reserved!
815 */
816struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
817 struct radeon_vm *vm,
818 struct radeon_bo *bo)
819{
820 struct radeon_bo_va *bo_va;
821
822 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
823 if (bo_va == NULL) {
824 return NULL;
825 }
826 bo_va->vm = vm;
827 bo_va->bo = bo;
828 bo_va->soffset = 0;
829 bo_va->eoffset = 0;
830 bo_va->flags = 0;
831 bo_va->valid = false;
832 bo_va->ref_count = 1;
833 INIT_LIST_HEAD(&bo_va->bo_list);
834 INIT_LIST_HEAD(&bo_va->vm_list);
835
836 mutex_lock(&vm->mutex);
837 list_add(&bo_va->vm_list, &vm->va);
838 list_add_tail(&bo_va->bo_list, &bo->va);
839 mutex_unlock(&vm->mutex);
840
841 return bo_va;
842}
843
844/**
845 * radeon_vm_bo_set_addr - set bos virtual address inside a vm
846 *
847 * @rdev: radeon_device pointer
848 * @bo_va: bo_va to store the address
849 * @soffset: requested offset of the buffer in the VM address space
850 * @flags: attributes of pages (read/write/valid/etc.)
851 *
852 * Set offset of @bo_va (cayman+).
853 * Validate and set the offset requested within the vm address space.
854 * Returns 0 for success, error for failure.
855 *
856 * Object has to be reserved!
857 */
858int radeon_vm_bo_set_addr(struct radeon_device *rdev,
859 struct radeon_bo_va *bo_va,
860 uint64_t soffset,
861 uint32_t flags)
862{
863 uint64_t size = radeon_bo_size(bo_va->bo);
864 uint64_t eoffset, last_offset = 0;
865 struct radeon_vm *vm = bo_va->vm;
866 struct radeon_bo_va *tmp;
867 struct list_head *head;
868 unsigned last_pfn;
869
870 if (soffset) {
871 /* make sure object fit at this offset */
872 eoffset = soffset + size;
873 if (soffset >= eoffset) {
874 return -EINVAL;
875 }
876
877 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
878 if (last_pfn > rdev->vm_manager.max_pfn) {
879 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
880 last_pfn, rdev->vm_manager.max_pfn);
881 return -EINVAL;
882 }
883
884 } else {
885 eoffset = last_pfn = 0;
886 }
887
888 mutex_lock(&vm->mutex);
889 head = &vm->va;
890 last_offset = 0;
891 list_for_each_entry(tmp, &vm->va, vm_list) {
892 if (bo_va == tmp) {
893 /* skip over currently modified bo */
894 continue;
895 }
896
897 if (soffset >= last_offset && eoffset <= tmp->soffset) {
898 /* bo can be added before this one */
899 break;
900 }
901 if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
902 /* bo and tmp overlap, invalid offset */
903 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
904 bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
905 (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
906 mutex_unlock(&vm->mutex);
907 return -EINVAL;
908 }
909 last_offset = tmp->eoffset;
910 head = &tmp->vm_list;
911 }
912
913 bo_va->soffset = soffset;
914 bo_va->eoffset = eoffset;
915 bo_va->flags = flags;
916 bo_va->valid = false;
917 list_move(&bo_va->vm_list, head);
918
919 mutex_unlock(&vm->mutex);
920 return 0;
921}
922
923/**
924 * radeon_vm_map_gart - get the physical address of a gart page
925 *
926 * @rdev: radeon_device pointer
927 * @addr: the unmapped addr
928 *
929 * Look up the physical address of the page that the pte resolves
930 * to (cayman+).
931 * Returns the physical address of the page.
932 */
933uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
934{
935 uint64_t result;
936
937 /* page table offset */
938 result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
939
940 /* in case cpu page size != gpu page size*/
941 result |= addr & (~PAGE_MASK);
942
943 return result;
944}
945
946/**
947 * radeon_vm_page_flags - translate page flags to what the hw uses
948 *
949 * @flags: flags comming from userspace
950 *
951 * Translate the flags the userspace ABI uses to hw flags.
952 */
953static uint32_t radeon_vm_page_flags(uint32_t flags)
954{
955 uint32_t hw_flags = 0;
956 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
957 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
958 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
959 if (flags & RADEON_VM_PAGE_SYSTEM) {
960 hw_flags |= R600_PTE_SYSTEM;
961 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
962 }
963 return hw_flags;
964}
965
966/**
967 * radeon_vm_update_pdes - make sure that page directory is valid
968 *
969 * @rdev: radeon_device pointer
970 * @vm: requested vm
971 * @start: start of GPU address range
972 * @end: end of GPU address range
973 *
974 * Allocates new page tables if necessary
975 * and updates the page directory (cayman+).
976 * Returns 0 for success, error for failure.
977 *
978 * Global and local mutex must be locked!
979 */
980static int radeon_vm_update_pdes(struct radeon_device *rdev,
981 struct radeon_vm *vm,
982 struct radeon_ib *ib,
983 uint64_t start, uint64_t end)
984{
985 static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
986
987 uint64_t last_pde = ~0, last_pt = ~0;
988 unsigned count = 0;
989 uint64_t pt_idx;
990 int r;
991
992 start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
993 end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
994
995 /* walk over the address space and update the page directory */
996 for (pt_idx = start; pt_idx <= end; ++pt_idx) {
997 uint64_t pde, pt;
998
999 if (vm->page_tables[pt_idx])
1000 continue;
1001
1002retry:
1003 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
1004 &vm->page_tables[pt_idx],
1005 RADEON_VM_PTE_COUNT * 8,
1006 RADEON_GPU_PAGE_SIZE, false);
1007
1008 if (r == -ENOMEM) {
1009 r = radeon_vm_evict(rdev, vm);
1010 if (r)
1011 return r;
1012 goto retry;
1013 } else if (r) {
1014 return r;
1015 }
1016
1017 pde = vm->pd_gpu_addr + pt_idx * 8;
1018
1019 pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
1020
1021 if (((last_pde + 8 * count) != pde) ||
1022 ((last_pt + incr * count) != pt)) {
1023
1024 if (count) {
1025 radeon_asic_vm_set_page(rdev, ib, last_pde,
1026 last_pt, count, incr,
1027 R600_PTE_VALID);
1028
1029 count *= RADEON_VM_PTE_COUNT;
1030 radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
1031 count, 0, 0);
1032 }
1033
1034 count = 1;
1035 last_pde = pde;
1036 last_pt = pt;
1037 } else {
1038 ++count;
1039 }
1040 }
1041
1042 if (count) {
1043 radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
1044 incr, R600_PTE_VALID);
1045
1046 count *= RADEON_VM_PTE_COUNT;
1047 radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
1048 count, 0, 0);
1049 }
1050
1051 return 0;
1052}
1053
1054/**
1055 * radeon_vm_update_ptes - make sure that page tables are valid
1056 *
1057 * @rdev: radeon_device pointer
1058 * @vm: requested vm
1059 * @start: start of GPU address range
1060 * @end: end of GPU address range
1061 * @dst: destination address to map to
1062 * @flags: mapping flags
1063 *
1064 * Update the page tables in the range @start - @end (cayman+).
1065 *
1066 * Global and local mutex must be locked!
1067 */
1068static void radeon_vm_update_ptes(struct radeon_device *rdev,
1069 struct radeon_vm *vm,
1070 struct radeon_ib *ib,
1071 uint64_t start, uint64_t end,
1072 uint64_t dst, uint32_t flags)
1073{
1074 static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
1075
1076 uint64_t last_pte = ~0, last_dst = ~0;
1077 unsigned count = 0;
1078 uint64_t addr;
1079
1080 start = start / RADEON_GPU_PAGE_SIZE;
1081 end = end / RADEON_GPU_PAGE_SIZE;
1082
1083 /* walk over the address space and update the page tables */
1084 for (addr = start; addr < end; ) {
1085 uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
1086 unsigned nptes;
1087 uint64_t pte;
1088
1089 if ((addr & ~mask) == (end & ~mask))
1090 nptes = end - addr;
1091 else
1092 nptes = RADEON_VM_PTE_COUNT - (addr & mask);
1093
1094 pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
1095 pte += (addr & mask) * 8;
1096
1097 if ((last_pte + 8 * count) != pte) {
1098
1099 if (count) {
1100 radeon_asic_vm_set_page(rdev, ib, last_pte,
1101 last_dst, count,
1102 RADEON_GPU_PAGE_SIZE,
1103 flags);
1104 }
1105
1106 count = nptes;
1107 last_pte = pte;
1108 last_dst = dst;
1109 } else {
1110 count += nptes;
1111 }
1112
1113 addr += nptes;
1114 dst += nptes * RADEON_GPU_PAGE_SIZE;
1115 }
1116
1117 if (count) {
1118 radeon_asic_vm_set_page(rdev, ib, last_pte,
1119 last_dst, count,
1120 RADEON_GPU_PAGE_SIZE, flags);
1121 }
1122}
1123
1124/**
1125 * radeon_vm_bo_update - map a bo into the vm page table
1126 *
1127 * @rdev: radeon_device pointer
1128 * @vm: requested vm
1129 * @bo: radeon buffer object
1130 * @mem: ttm mem
1131 *
1132 * Fill in the page table entries for @bo (cayman+).
1133 * Returns 0 for success, -EINVAL for failure.
1134 *
1135 * Object have to be reserved & global and local mutex must be locked!
1136 */
1137int radeon_vm_bo_update(struct radeon_device *rdev,
1138 struct radeon_vm *vm,
1139 struct radeon_bo *bo,
1140 struct ttm_mem_reg *mem)
1141{
1142 struct radeon_ib ib;
1143 struct radeon_bo_va *bo_va;
1144 unsigned nptes, npdes, ndw;
1145 uint64_t addr;
1146 int r;
1147
1148 /* nothing to do if vm isn't bound */
1149 if (vm->page_directory == NULL)
1150 return 0;
1151
1152 bo_va = radeon_vm_bo_find(vm, bo);
1153 if (bo_va == NULL) {
1154 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
1155 return -EINVAL;
1156 }
1157
1158 if (!bo_va->soffset) {
1159 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
1160 bo, vm);
1161 return -EINVAL;
1162 }
1163
1164 if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
1165 return 0;
1166
1167 bo_va->flags &= ~RADEON_VM_PAGE_VALID;
1168 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
1169 if (mem) {
1170 addr = mem->start << PAGE_SHIFT;
1171 if (mem->mem_type != TTM_PL_SYSTEM) {
1172 bo_va->flags |= RADEON_VM_PAGE_VALID;
1173 bo_va->valid = true;
1174 }
1175 if (mem->mem_type == TTM_PL_TT) {
1176 bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
1177 } else {
1178 addr += rdev->vm_manager.vram_base_offset;
1179 }
1180 } else {
1181 addr = 0;
1182 bo_va->valid = false;
1183 }
1184
1185 trace_radeon_vm_bo_update(bo_va);
1186
1187 nptes = radeon_bo_ngpu_pages(bo);
1188
1189 /* assume two extra pdes in case the mapping overlaps the borders */
1190 npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
1191
1192 /* padding, etc. */
1193 ndw = 64;
1194
1195 if (RADEON_VM_BLOCK_SIZE > 11)
1196 /* reserve space for one header for every 2k dwords */
1197 ndw += (nptes >> 11) * 4;
1198 else
1199 /* reserve space for one header for
1200 every (1 << BLOCK_SIZE) entries */
1201 ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
1202
1203 /* reserve space for pte addresses */
1204 ndw += nptes * 2;
1205
1206 /* reserve space for one header for every 2k dwords */
1207 ndw += (npdes >> 11) * 4;
1208
1209 /* reserve space for pde addresses */
1210 ndw += npdes * 2;
1211
1212 /* reserve space for clearing new page tables */
1213 ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
1214
1215 /* update too big for an IB */
1216 if (ndw > 0xfffff)
1217 return -ENOMEM;
1218
1219 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
1220 if (r)
1221 return r;
1222 ib.length_dw = 0;
1223
1224 r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
1225 if (r) {
1226 radeon_ib_free(rdev, &ib);
1227 return r;
1228 }
1229
1230 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
1231 addr, radeon_vm_page_flags(bo_va->flags));
1232
1233 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
1234 r = radeon_ib_schedule(rdev, &ib, NULL);
1235 if (r) {
1236 radeon_ib_free(rdev, &ib);
1237 return r;
1238 }
1239 radeon_fence_unref(&vm->fence);
1240 vm->fence = radeon_fence_ref(ib.fence);
1241 radeon_ib_free(rdev, &ib);
1242 radeon_fence_unref(&vm->last_flush);
1243
1244 return 0;
1245}
1246
1247/**
1248 * radeon_vm_bo_rmv - remove a bo to a specific vm
1249 *
1250 * @rdev: radeon_device pointer
1251 * @bo_va: requested bo_va
1252 *
1253 * Remove @bo_va->bo from the requested vm (cayman+).
1254 * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
1255 * remove the ptes for @bo_va in the page table.
1256 * Returns 0 for success.
1257 *
1258 * Object have to be reserved!
1259 */
1260int radeon_vm_bo_rmv(struct radeon_device *rdev,
1261 struct radeon_bo_va *bo_va)
1262{
1263 int r = 0;
1264
1265 mutex_lock(&rdev->vm_manager.lock);
1266 mutex_lock(&bo_va->vm->mutex);
1267 if (bo_va->soffset) {
1268 r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
1269 }
1270 mutex_unlock(&rdev->vm_manager.lock);
1271 list_del(&bo_va->vm_list);
1272 mutex_unlock(&bo_va->vm->mutex);
1273 list_del(&bo_va->bo_list);
1274
1275 kfree(bo_va);
1276 return r;
1277}
1278
1279/**
1280 * radeon_vm_bo_invalidate - mark the bo as invalid
1281 *
1282 * @rdev: radeon_device pointer
1283 * @vm: requested vm
1284 * @bo: radeon buffer object
1285 *
1286 * Mark @bo as invalid (cayman+).
1287 */
1288void radeon_vm_bo_invalidate(struct radeon_device *rdev,
1289 struct radeon_bo *bo)
1290{
1291 struct radeon_bo_va *bo_va;
1292
1293 list_for_each_entry(bo_va, &bo->va, bo_list) {
1294 bo_va->valid = false;
1295 }
1296}
1297
1298/**
1299 * radeon_vm_init - initialize a vm instance
1300 *
1301 * @rdev: radeon_device pointer
1302 * @vm: requested vm
1303 *
1304 * Init @vm fields (cayman+).
1305 */
1306void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1307{
1308 vm->id = 0;
1309 vm->fence = NULL;
1310 vm->last_flush = NULL;
1311 vm->last_id_use = NULL;
1312 mutex_init(&vm->mutex);
1313 INIT_LIST_HEAD(&vm->list);
1314 INIT_LIST_HEAD(&vm->va);
1315}
1316
1317/**
1318 * radeon_vm_fini - tear down a vm instance
1319 *
1320 * @rdev: radeon_device pointer
1321 * @vm: requested vm
1322 *
1323 * Tear down @vm (cayman+).
1324 * Unbind the VM and remove all bos from the vm bo list
1325 */
1326void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1327{
1328 struct radeon_bo_va *bo_va, *tmp;
1329 int r;
1330
1331 mutex_lock(&rdev->vm_manager.lock);
1332 mutex_lock(&vm->mutex);
1333 radeon_vm_free_pt(rdev, vm);
1334 mutex_unlock(&rdev->vm_manager.lock);
1335
1336 if (!list_empty(&vm->va)) {
1337 dev_err(rdev->dev, "still active bo inside vm\n");
1338 }
1339 list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
1340 list_del_init(&bo_va->vm_list);
1341 r = radeon_bo_reserve(bo_va->bo, false);
1342 if (!r) {
1343 list_del_init(&bo_va->bo_list);
1344 radeon_bo_unreserve(bo_va->bo);
1345 kfree(bo_va);
1346 }
1347 }
1348 radeon_fence_unref(&vm->fence);
1349 radeon_fence_unref(&vm->last_flush);
1350 radeon_fence_unref(&vm->last_id_use);
1351 mutex_unlock(&vm->mutex);
1352}
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
new file mode 100644
index 000000000000..433b1ebd07ea
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -0,0 +1,981 @@
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28#include <drm/drmP.h>
29#include <drm/radeon_drm.h>
30#include "radeon.h"
31#include "radeon_trace.h"
32
33/*
34 * GPUVM
35 * GPUVM is similar to the legacy gart on older asics, however
36 * rather than there being a single global gart table
37 * for the entire GPU, there are multiple VM page tables active
38 * at any given time. The VM page tables can contain a mix
39 * vram pages and system memory pages and system memory pages
40 * can be mapped as snooped (cached system pages) or unsnooped
41 * (uncached system pages).
42 * Each VM has an ID associated with it and there is a page table
43 * associated with each VMID. When execting a command buffer,
44 * the kernel tells the the ring what VMID to use for that command
45 * buffer. VMIDs are allocated dynamically as commands are submitted.
46 * The userspace drivers maintain their own address space and the kernel
47 * sets up their pages tables accordingly when they submit their
48 * command buffers and a VMID is assigned.
49 * Cayman/Trinity support up to 8 active VMs at any given time;
50 * SI supports 16.
51 */
52
53/**
54 * radeon_vm_num_pde - return the number of page directory entries
55 *
56 * @rdev: radeon_device pointer
57 *
58 * Calculate the number of page directory entries (cayman+).
59 */
60static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
61{
62 return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
63}
64
65/**
66 * radeon_vm_directory_size - returns the size of the page directory in bytes
67 *
68 * @rdev: radeon_device pointer
69 *
70 * Calculate the size of the page directory in bytes (cayman+).
71 */
72static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
73{
74 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
75}
76
77/**
78 * radeon_vm_manager_init - init the vm manager
79 *
80 * @rdev: radeon_device pointer
81 *
82 * Init the vm manager (cayman+).
83 * Returns 0 for success, error for failure.
84 */
85int radeon_vm_manager_init(struct radeon_device *rdev)
86{
87 struct radeon_vm *vm;
88 struct radeon_bo_va *bo_va;
89 int r;
90 unsigned size;
91
92 if (!rdev->vm_manager.enabled) {
93 /* allocate enough for 2 full VM pts */
94 size = radeon_vm_directory_size(rdev);
95 size += rdev->vm_manager.max_pfn * 8;
96 size *= 2;
97 r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
98 RADEON_GPU_PAGE_ALIGN(size),
99 RADEON_VM_PTB_ALIGN_SIZE,
100 RADEON_GEM_DOMAIN_VRAM);
101 if (r) {
102 dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
103 (rdev->vm_manager.max_pfn * 8) >> 10);
104 return r;
105 }
106
107 r = radeon_asic_vm_init(rdev);
108 if (r)
109 return r;
110
111 rdev->vm_manager.enabled = true;
112
113 r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
114 if (r)
115 return r;
116 }
117
118 /* restore page table */
119 list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
120 if (vm->page_directory == NULL)
121 continue;
122
123 list_for_each_entry(bo_va, &vm->va, vm_list) {
124 bo_va->valid = false;
125 }
126 }
127 return 0;
128}
129
130/**
131 * radeon_vm_free_pt - free the page table for a specific vm
132 *
133 * @rdev: radeon_device pointer
134 * @vm: vm to unbind
135 *
136 * Free the page table of a specific vm (cayman+).
137 *
138 * Global and local mutex must be lock!
139 */
140static void radeon_vm_free_pt(struct radeon_device *rdev,
141 struct radeon_vm *vm)
142{
143 struct radeon_bo_va *bo_va;
144 int i;
145
146 if (!vm->page_directory)
147 return;
148
149 list_del_init(&vm->list);
150 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
151
152 list_for_each_entry(bo_va, &vm->va, vm_list) {
153 bo_va->valid = false;
154 }
155
156 if (vm->page_tables == NULL)
157 return;
158
159 for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
160 radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
161
162 kfree(vm->page_tables);
163}
164
165/**
166 * radeon_vm_manager_fini - tear down the vm manager
167 *
168 * @rdev: radeon_device pointer
169 *
170 * Tear down the VM manager (cayman+).
171 */
172void radeon_vm_manager_fini(struct radeon_device *rdev)
173{
174 struct radeon_vm *vm, *tmp;
175 int i;
176
177 if (!rdev->vm_manager.enabled)
178 return;
179
180 mutex_lock(&rdev->vm_manager.lock);
181 /* free all allocated page tables */
182 list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
183 mutex_lock(&vm->mutex);
184 radeon_vm_free_pt(rdev, vm);
185 mutex_unlock(&vm->mutex);
186 }
187 for (i = 0; i < RADEON_NUM_VM; ++i) {
188 radeon_fence_unref(&rdev->vm_manager.active[i]);
189 }
190 radeon_asic_vm_fini(rdev);
191 mutex_unlock(&rdev->vm_manager.lock);
192
193 radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
194 radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
195 rdev->vm_manager.enabled = false;
196}
197
198/**
199 * radeon_vm_evict - evict page table to make room for new one
200 *
201 * @rdev: radeon_device pointer
202 * @vm: VM we want to allocate something for
203 *
204 * Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
205 * Returns 0 for success, -ENOMEM for failure.
206 *
207 * Global and local mutex must be locked!
208 */
209static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
210{
211 struct radeon_vm *vm_evict;
212
213 if (list_empty(&rdev->vm_manager.lru_vm))
214 return -ENOMEM;
215
216 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
217 struct radeon_vm, list);
218 if (vm_evict == vm)
219 return -ENOMEM;
220
221 mutex_lock(&vm_evict->mutex);
222 radeon_vm_free_pt(rdev, vm_evict);
223 mutex_unlock(&vm_evict->mutex);
224 return 0;
225}
226
227/**
228 * radeon_vm_alloc_pt - allocates a page table for a VM
229 *
230 * @rdev: radeon_device pointer
231 * @vm: vm to bind
232 *
233 * Allocate a page table for the requested vm (cayman+).
234 * Returns 0 for success, error for failure.
235 *
236 * Global and local mutex must be locked!
237 */
238int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
239{
240 unsigned pd_size, pd_entries, pts_size;
241 struct radeon_ib ib;
242 int r;
243
244 if (vm == NULL) {
245 return -EINVAL;
246 }
247
248 if (vm->page_directory != NULL) {
249 return 0;
250 }
251
252 pd_size = radeon_vm_directory_size(rdev);
253 pd_entries = radeon_vm_num_pdes(rdev);
254
255retry:
256 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
257 &vm->page_directory, pd_size,
258 RADEON_VM_PTB_ALIGN_SIZE, false);
259 if (r == -ENOMEM) {
260 r = radeon_vm_evict(rdev, vm);
261 if (r)
262 return r;
263 goto retry;
264
265 } else if (r) {
266 return r;
267 }
268
269 vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
270
271 /* Initially clear the page directory */
272 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
273 NULL, pd_entries * 2 + 64);
274 if (r) {
275 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
276 return r;
277 }
278
279 ib.length_dw = 0;
280
281 radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
282 0, pd_entries, 0, 0);
283
284 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
285 r = radeon_ib_schedule(rdev, &ib, NULL);
286 if (r) {
287 radeon_ib_free(rdev, &ib);
288 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
289 return r;
290 }
291 radeon_fence_unref(&vm->fence);
292 vm->fence = radeon_fence_ref(ib.fence);
293 radeon_ib_free(rdev, &ib);
294 radeon_fence_unref(&vm->last_flush);
295
296 /* allocate page table array */
297 pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
298 vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
299
300 if (vm->page_tables == NULL) {
301 DRM_ERROR("Cannot allocate memory for page table array\n");
302 radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
303 return -ENOMEM;
304 }
305
306 return 0;
307}
308
309/**
310 * radeon_vm_add_to_lru - add VMs page table to LRU list
311 *
312 * @rdev: radeon_device pointer
313 * @vm: vm to add to LRU
314 *
315 * Add the allocated page table to the LRU list (cayman+).
316 *
317 * Global mutex must be locked!
318 */
319void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
320{
321 list_del_init(&vm->list);
322 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
323}
324
325/**
326 * radeon_vm_grab_id - allocate the next free VMID
327 *
328 * @rdev: radeon_device pointer
329 * @vm: vm to allocate id for
330 * @ring: ring we want to submit job to
331 *
332 * Allocate an id for the vm (cayman+).
333 * Returns the fence we need to sync to (if any).
334 *
335 * Global and local mutex must be locked!
336 */
337struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
338 struct radeon_vm *vm, int ring)
339{
340 struct radeon_fence *best[RADEON_NUM_RINGS] = {};
341 unsigned choices[2] = {};
342 unsigned i;
343
344 /* check if the id is still valid */
345 if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
346 return NULL;
347
348 /* we definately need to flush */
349 radeon_fence_unref(&vm->last_flush);
350
351 /* skip over VMID 0, since it is the system VM */
352 for (i = 1; i < rdev->vm_manager.nvm; ++i) {
353 struct radeon_fence *fence = rdev->vm_manager.active[i];
354
355 if (fence == NULL) {
356 /* found a free one */
357 vm->id = i;
358 trace_radeon_vm_grab_id(vm->id, ring);
359 return NULL;
360 }
361
362 if (radeon_fence_is_earlier(fence, best[fence->ring])) {
363 best[fence->ring] = fence;
364 choices[fence->ring == ring ? 0 : 1] = i;
365 }
366 }
367
368 for (i = 0; i < 2; ++i) {
369 if (choices[i]) {
370 vm->id = choices[i];
371 trace_radeon_vm_grab_id(vm->id, ring);
372 return rdev->vm_manager.active[choices[i]];
373 }
374 }
375
376 /* should never happen */
377 BUG();
378 return NULL;
379}
380
381/**
382 * radeon_vm_fence - remember fence for vm
383 *
384 * @rdev: radeon_device pointer
385 * @vm: vm we want to fence
386 * @fence: fence to remember
387 *
388 * Fence the vm (cayman+).
389 * Set the fence used to protect page table and id.
390 *
391 * Global and local mutex must be locked!
392 */
393void radeon_vm_fence(struct radeon_device *rdev,
394 struct radeon_vm *vm,
395 struct radeon_fence *fence)
396{
397 radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
398 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
399
400 radeon_fence_unref(&vm->fence);
401 vm->fence = radeon_fence_ref(fence);
402
403 radeon_fence_unref(&vm->last_id_use);
404 vm->last_id_use = radeon_fence_ref(fence);
405}
406
407/**
408 * radeon_vm_bo_find - find the bo_va for a specific vm & bo
409 *
410 * @vm: requested vm
411 * @bo: requested buffer object
412 *
413 * Find @bo inside the requested vm (cayman+).
414 * Search inside the @bos vm list for the requested vm
415 * Returns the found bo_va or NULL if none is found
416 *
417 * Object has to be reserved!
418 */
419struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
420 struct radeon_bo *bo)
421{
422 struct radeon_bo_va *bo_va;
423
424 list_for_each_entry(bo_va, &bo->va, bo_list) {
425 if (bo_va->vm == vm) {
426 return bo_va;
427 }
428 }
429 return NULL;
430}
431
432/**
433 * radeon_vm_bo_add - add a bo to a specific vm
434 *
435 * @rdev: radeon_device pointer
436 * @vm: requested vm
437 * @bo: radeon buffer object
438 *
439 * Add @bo into the requested vm (cayman+).
440 * Add @bo to the list of bos associated with the vm
441 * Returns newly added bo_va or NULL for failure
442 *
443 * Object has to be reserved!
444 */
445struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
446 struct radeon_vm *vm,
447 struct radeon_bo *bo)
448{
449 struct radeon_bo_va *bo_va;
450
451 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
452 if (bo_va == NULL) {
453 return NULL;
454 }
455 bo_va->vm = vm;
456 bo_va->bo = bo;
457 bo_va->soffset = 0;
458 bo_va->eoffset = 0;
459 bo_va->flags = 0;
460 bo_va->valid = false;
461 bo_va->ref_count = 1;
462 INIT_LIST_HEAD(&bo_va->bo_list);
463 INIT_LIST_HEAD(&bo_va->vm_list);
464
465 mutex_lock(&vm->mutex);
466 list_add(&bo_va->vm_list, &vm->va);
467 list_add_tail(&bo_va->bo_list, &bo->va);
468 mutex_unlock(&vm->mutex);
469
470 return bo_va;
471}
472
473/**
474 * radeon_vm_bo_set_addr - set bos virtual address inside a vm
475 *
476 * @rdev: radeon_device pointer
477 * @bo_va: bo_va to store the address
478 * @soffset: requested offset of the buffer in the VM address space
479 * @flags: attributes of pages (read/write/valid/etc.)
480 *
481 * Set offset of @bo_va (cayman+).
482 * Validate and set the offset requested within the vm address space.
483 * Returns 0 for success, error for failure.
484 *
485 * Object has to be reserved!
486 */
487int radeon_vm_bo_set_addr(struct radeon_device *rdev,
488 struct radeon_bo_va *bo_va,
489 uint64_t soffset,
490 uint32_t flags)
491{
492 uint64_t size = radeon_bo_size(bo_va->bo);
493 uint64_t eoffset, last_offset = 0;
494 struct radeon_vm *vm = bo_va->vm;
495 struct radeon_bo_va *tmp;
496 struct list_head *head;
497 unsigned last_pfn;
498
499 if (soffset) {
500 /* make sure object fit at this offset */
501 eoffset = soffset + size;
502 if (soffset >= eoffset) {
503 return -EINVAL;
504 }
505
506 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
507 if (last_pfn > rdev->vm_manager.max_pfn) {
508 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
509 last_pfn, rdev->vm_manager.max_pfn);
510 return -EINVAL;
511 }
512
513 } else {
514 eoffset = last_pfn = 0;
515 }
516
517 mutex_lock(&vm->mutex);
518 head = &vm->va;
519 last_offset = 0;
520 list_for_each_entry(tmp, &vm->va, vm_list) {
521 if (bo_va == tmp) {
522 /* skip over currently modified bo */
523 continue;
524 }
525
526 if (soffset >= last_offset && eoffset <= tmp->soffset) {
527 /* bo can be added before this one */
528 break;
529 }
530 if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
531 /* bo and tmp overlap, invalid offset */
532 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
533 bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
534 (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
535 mutex_unlock(&vm->mutex);
536 return -EINVAL;
537 }
538 last_offset = tmp->eoffset;
539 head = &tmp->vm_list;
540 }
541
542 bo_va->soffset = soffset;
543 bo_va->eoffset = eoffset;
544 bo_va->flags = flags;
545 bo_va->valid = false;
546 list_move(&bo_va->vm_list, head);
547
548 mutex_unlock(&vm->mutex);
549 return 0;
550}
551
552/**
553 * radeon_vm_map_gart - get the physical address of a gart page
554 *
555 * @rdev: radeon_device pointer
556 * @addr: the unmapped addr
557 *
558 * Look up the physical address of the page that the pte resolves
559 * to (cayman+).
560 * Returns the physical address of the page.
561 */
562uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
563{
564 uint64_t result;
565
566 /* page table offset */
567 result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
568
569 /* in case cpu page size != gpu page size*/
570 result |= addr & (~PAGE_MASK);
571
572 return result;
573}
574
575/**
576 * radeon_vm_page_flags - translate page flags to what the hw uses
577 *
578 * @flags: flags comming from userspace
579 *
580 * Translate the flags the userspace ABI uses to hw flags.
581 */
582static uint32_t radeon_vm_page_flags(uint32_t flags)
583{
584 uint32_t hw_flags = 0;
585 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
586 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
587 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
588 if (flags & RADEON_VM_PAGE_SYSTEM) {
589 hw_flags |= R600_PTE_SYSTEM;
590 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
591 }
592 return hw_flags;
593}
594
595/**
596 * radeon_vm_update_pdes - make sure that page directory is valid
597 *
598 * @rdev: radeon_device pointer
599 * @vm: requested vm
600 * @start: start of GPU address range
601 * @end: end of GPU address range
602 *
603 * Allocates new page tables if necessary
604 * and updates the page directory (cayman+).
605 * Returns 0 for success, error for failure.
606 *
607 * Global and local mutex must be locked!
608 */
609static int radeon_vm_update_pdes(struct radeon_device *rdev,
610 struct radeon_vm *vm,
611 struct radeon_ib *ib,
612 uint64_t start, uint64_t end)
613{
614 static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
615
616 uint64_t last_pde = ~0, last_pt = ~0;
617 unsigned count = 0;
618 uint64_t pt_idx;
619 int r;
620
621 start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
622 end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
623
624 /* walk over the address space and update the page directory */
625 for (pt_idx = start; pt_idx <= end; ++pt_idx) {
626 uint64_t pde, pt;
627
628 if (vm->page_tables[pt_idx])
629 continue;
630
631retry:
632 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
633 &vm->page_tables[pt_idx],
634 RADEON_VM_PTE_COUNT * 8,
635 RADEON_GPU_PAGE_SIZE, false);
636
637 if (r == -ENOMEM) {
638 r = radeon_vm_evict(rdev, vm);
639 if (r)
640 return r;
641 goto retry;
642 } else if (r) {
643 return r;
644 }
645
646 pde = vm->pd_gpu_addr + pt_idx * 8;
647
648 pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
649
650 if (((last_pde + 8 * count) != pde) ||
651 ((last_pt + incr * count) != pt)) {
652
653 if (count) {
654 radeon_asic_vm_set_page(rdev, ib, last_pde,
655 last_pt, count, incr,
656 R600_PTE_VALID);
657
658 count *= RADEON_VM_PTE_COUNT;
659 radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
660 count, 0, 0);
661 }
662
663 count = 1;
664 last_pde = pde;
665 last_pt = pt;
666 } else {
667 ++count;
668 }
669 }
670
671 if (count) {
672 radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
673 incr, R600_PTE_VALID);
674
675 count *= RADEON_VM_PTE_COUNT;
676 radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
677 count, 0, 0);
678 }
679
680 return 0;
681}
682
683/**
684 * radeon_vm_update_ptes - make sure that page tables are valid
685 *
686 * @rdev: radeon_device pointer
687 * @vm: requested vm
688 * @start: start of GPU address range
689 * @end: end of GPU address range
690 * @dst: destination address to map to
691 * @flags: mapping flags
692 *
693 * Update the page tables in the range @start - @end (cayman+).
694 *
695 * Global and local mutex must be locked!
696 */
697static void radeon_vm_update_ptes(struct radeon_device *rdev,
698 struct radeon_vm *vm,
699 struct radeon_ib *ib,
700 uint64_t start, uint64_t end,
701 uint64_t dst, uint32_t flags)
702{
703 static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
704
705 uint64_t last_pte = ~0, last_dst = ~0;
706 unsigned count = 0;
707 uint64_t addr;
708
709 start = start / RADEON_GPU_PAGE_SIZE;
710 end = end / RADEON_GPU_PAGE_SIZE;
711
712 /* walk over the address space and update the page tables */
713 for (addr = start; addr < end; ) {
714 uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
715 unsigned nptes;
716 uint64_t pte;
717
718 if ((addr & ~mask) == (end & ~mask))
719 nptes = end - addr;
720 else
721 nptes = RADEON_VM_PTE_COUNT - (addr & mask);
722
723 pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
724 pte += (addr & mask) * 8;
725
726 if ((last_pte + 8 * count) != pte) {
727
728 if (count) {
729 radeon_asic_vm_set_page(rdev, ib, last_pte,
730 last_dst, count,
731 RADEON_GPU_PAGE_SIZE,
732 flags);
733 }
734
735 count = nptes;
736 last_pte = pte;
737 last_dst = dst;
738 } else {
739 count += nptes;
740 }
741
742 addr += nptes;
743 dst += nptes * RADEON_GPU_PAGE_SIZE;
744 }
745
746 if (count) {
747 radeon_asic_vm_set_page(rdev, ib, last_pte,
748 last_dst, count,
749 RADEON_GPU_PAGE_SIZE, flags);
750 }
751}
752
753/**
754 * radeon_vm_bo_update - map a bo into the vm page table
755 *
756 * @rdev: radeon_device pointer
757 * @vm: requested vm
758 * @bo: radeon buffer object
759 * @mem: ttm mem
760 *
761 * Fill in the page table entries for @bo (cayman+).
762 * Returns 0 for success, -EINVAL for failure.
763 *
764 * Object have to be reserved & global and local mutex must be locked!
765 */
766int radeon_vm_bo_update(struct radeon_device *rdev,
767 struct radeon_vm *vm,
768 struct radeon_bo *bo,
769 struct ttm_mem_reg *mem)
770{
771 struct radeon_ib ib;
772 struct radeon_bo_va *bo_va;
773 unsigned nptes, npdes, ndw;
774 uint64_t addr;
775 int r;
776
777 /* nothing to do if vm isn't bound */
778 if (vm->page_directory == NULL)
779 return 0;
780
781 bo_va = radeon_vm_bo_find(vm, bo);
782 if (bo_va == NULL) {
783 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
784 return -EINVAL;
785 }
786
787 if (!bo_va->soffset) {
788 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
789 bo, vm);
790 return -EINVAL;
791 }
792
793 if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
794 return 0;
795
796 bo_va->flags &= ~RADEON_VM_PAGE_VALID;
797 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
798 if (mem) {
799 addr = mem->start << PAGE_SHIFT;
800 if (mem->mem_type != TTM_PL_SYSTEM) {
801 bo_va->flags |= RADEON_VM_PAGE_VALID;
802 bo_va->valid = true;
803 }
804 if (mem->mem_type == TTM_PL_TT) {
805 bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
806 } else {
807 addr += rdev->vm_manager.vram_base_offset;
808 }
809 } else {
810 addr = 0;
811 bo_va->valid = false;
812 }
813
814 trace_radeon_vm_bo_update(bo_va);
815
816 nptes = radeon_bo_ngpu_pages(bo);
817
818 /* assume two extra pdes in case the mapping overlaps the borders */
819 npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
820
821 /* padding, etc. */
822 ndw = 64;
823
824 if (RADEON_VM_BLOCK_SIZE > 11)
825 /* reserve space for one header for every 2k dwords */
826 ndw += (nptes >> 11) * 4;
827 else
828 /* reserve space for one header for
829 every (1 << BLOCK_SIZE) entries */
830 ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
831
832 /* reserve space for pte addresses */
833 ndw += nptes * 2;
834
835 /* reserve space for one header for every 2k dwords */
836 ndw += (npdes >> 11) * 4;
837
838 /* reserve space for pde addresses */
839 ndw += npdes * 2;
840
841 /* reserve space for clearing new page tables */
842 ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
843
844 /* update too big for an IB */
845 if (ndw > 0xfffff)
846 return -ENOMEM;
847
848 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
849 if (r)
850 return r;
851 ib.length_dw = 0;
852
853 r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
854 if (r) {
855 radeon_ib_free(rdev, &ib);
856 return r;
857 }
858
859 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
860 addr, radeon_vm_page_flags(bo_va->flags));
861
862 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
863 r = radeon_ib_schedule(rdev, &ib, NULL);
864 if (r) {
865 radeon_ib_free(rdev, &ib);
866 return r;
867 }
868 radeon_fence_unref(&vm->fence);
869 vm->fence = radeon_fence_ref(ib.fence);
870 radeon_ib_free(rdev, &ib);
871 radeon_fence_unref(&vm->last_flush);
872
873 return 0;
874}
875
876/**
877 * radeon_vm_bo_rmv - remove a bo to a specific vm
878 *
879 * @rdev: radeon_device pointer
880 * @bo_va: requested bo_va
881 *
882 * Remove @bo_va->bo from the requested vm (cayman+).
883 * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
884 * remove the ptes for @bo_va in the page table.
885 * Returns 0 for success.
886 *
887 * Object have to be reserved!
888 */
889int radeon_vm_bo_rmv(struct radeon_device *rdev,
890 struct radeon_bo_va *bo_va)
891{
892 int r = 0;
893
894 mutex_lock(&rdev->vm_manager.lock);
895 mutex_lock(&bo_va->vm->mutex);
896 if (bo_va->soffset) {
897 r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
898 }
899 mutex_unlock(&rdev->vm_manager.lock);
900 list_del(&bo_va->vm_list);
901 mutex_unlock(&bo_va->vm->mutex);
902 list_del(&bo_va->bo_list);
903
904 kfree(bo_va);
905 return r;
906}
907
908/**
909 * radeon_vm_bo_invalidate - mark the bo as invalid
910 *
911 * @rdev: radeon_device pointer
912 * @vm: requested vm
913 * @bo: radeon buffer object
914 *
915 * Mark @bo as invalid (cayman+).
916 */
917void radeon_vm_bo_invalidate(struct radeon_device *rdev,
918 struct radeon_bo *bo)
919{
920 struct radeon_bo_va *bo_va;
921
922 list_for_each_entry(bo_va, &bo->va, bo_list) {
923 bo_va->valid = false;
924 }
925}
926
927/**
928 * radeon_vm_init - initialize a vm instance
929 *
930 * @rdev: radeon_device pointer
931 * @vm: requested vm
932 *
933 * Init @vm fields (cayman+).
934 */
935void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
936{
937 vm->id = 0;
938 vm->fence = NULL;
939 vm->last_flush = NULL;
940 vm->last_id_use = NULL;
941 mutex_init(&vm->mutex);
942 INIT_LIST_HEAD(&vm->list);
943 INIT_LIST_HEAD(&vm->va);
944}
945
946/**
947 * radeon_vm_fini - tear down a vm instance
948 *
949 * @rdev: radeon_device pointer
950 * @vm: requested vm
951 *
952 * Tear down @vm (cayman+).
953 * Unbind the VM and remove all bos from the vm bo list
954 */
955void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
956{
957 struct radeon_bo_va *bo_va, *tmp;
958 int r;
959
960 mutex_lock(&rdev->vm_manager.lock);
961 mutex_lock(&vm->mutex);
962 radeon_vm_free_pt(rdev, vm);
963 mutex_unlock(&rdev->vm_manager.lock);
964
965 if (!list_empty(&vm->va)) {
966 dev_err(rdev->dev, "still active bo inside vm\n");
967 }
968 list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
969 list_del_init(&bo_va->vm_list);
970 r = radeon_bo_reserve(bo_va->bo, false);
971 if (!r) {
972 list_del_init(&bo_va->bo_list);
973 radeon_bo_unreserve(bo_va->bo);
974 kfree(bo_va);
975 }
976 }
977 radeon_fence_unref(&vm->fence);
978 radeon_fence_unref(&vm->last_flush);
979 radeon_fence_unref(&vm->last_id_use);
980 mutex_unlock(&vm->mutex);
981}