diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_gart.c')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_gart.c | 386 |
1 files changed, 276 insertions, 110 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index f0c06d196b75..4debd60e5aa6 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c | |||
@@ -355,14 +355,13 @@ int radeon_gart_init(struct radeon_device *rdev) | |||
355 | DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", | 355 | DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", |
356 | rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); | 356 | rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); |
357 | /* Allocate pages table */ | 357 | /* Allocate pages table */ |
358 | rdev->gart.pages = kzalloc(sizeof(void *) * rdev->gart.num_cpu_pages, | 358 | rdev->gart.pages = vzalloc(sizeof(void *) * rdev->gart.num_cpu_pages); |
359 | GFP_KERNEL); | ||
360 | if (rdev->gart.pages == NULL) { | 359 | if (rdev->gart.pages == NULL) { |
361 | radeon_gart_fini(rdev); | 360 | radeon_gart_fini(rdev); |
362 | return -ENOMEM; | 361 | return -ENOMEM; |
363 | } | 362 | } |
364 | rdev->gart.pages_addr = kzalloc(sizeof(dma_addr_t) * | 363 | rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) * |
365 | rdev->gart.num_cpu_pages, GFP_KERNEL); | 364 | rdev->gart.num_cpu_pages); |
366 | if (rdev->gart.pages_addr == NULL) { | 365 | if (rdev->gart.pages_addr == NULL) { |
367 | radeon_gart_fini(rdev); | 366 | radeon_gart_fini(rdev); |
368 | return -ENOMEM; | 367 | return -ENOMEM; |
@@ -388,8 +387,8 @@ void radeon_gart_fini(struct radeon_device *rdev) | |||
388 | radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); | 387 | radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); |
389 | } | 388 | } |
390 | rdev->gart.ready = false; | 389 | rdev->gart.ready = false; |
391 | kfree(rdev->gart.pages); | 390 | vfree(rdev->gart.pages); |
392 | kfree(rdev->gart.pages_addr); | 391 | vfree(rdev->gart.pages_addr); |
393 | rdev->gart.pages = NULL; | 392 | rdev->gart.pages = NULL; |
394 | rdev->gart.pages_addr = NULL; | 393 | rdev->gart.pages_addr = NULL; |
395 | 394 | ||
@@ -423,6 +422,18 @@ void radeon_gart_fini(struct radeon_device *rdev) | |||
423 | */ | 422 | */ |
424 | 423 | ||
425 | /** | 424 | /** |
425 | * radeon_vm_num_pde - return the number of page directory entries | ||
426 | * | ||
427 | * @rdev: radeon_device pointer | ||
428 | * | ||
429 | * Calculate the number of page directory entries (cayman+). | ||
430 | */ | ||
431 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) | ||
432 | { | ||
433 | return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; | ||
434 | } | ||
435 | |||
436 | /** | ||
426 | * radeon_vm_directory_size - returns the size of the page directory in bytes | 437 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
427 | * | 438 | * |
428 | * @rdev: radeon_device pointer | 439 | * @rdev: radeon_device pointer |
@@ -431,7 +442,7 @@ void radeon_gart_fini(struct radeon_device *rdev) | |||
431 | */ | 442 | */ |
432 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) | 443 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
433 | { | 444 | { |
434 | return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; | 445 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
435 | } | 446 | } |
436 | 447 | ||
437 | /** | 448 | /** |
@@ -451,11 +462,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev) | |||
451 | 462 | ||
452 | if (!rdev->vm_manager.enabled) { | 463 | if (!rdev->vm_manager.enabled) { |
453 | /* allocate enough for 2 full VM pts */ | 464 | /* allocate enough for 2 full VM pts */ |
454 | size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); | 465 | size = radeon_vm_directory_size(rdev); |
455 | size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); | 466 | size += rdev->vm_manager.max_pfn * 8; |
456 | size *= 2; | 467 | size *= 2; |
457 | r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, | 468 | r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, |
458 | size, | 469 | RADEON_GPU_PAGE_ALIGN(size), |
459 | RADEON_GEM_DOMAIN_VRAM); | 470 | RADEON_GEM_DOMAIN_VRAM); |
460 | if (r) { | 471 | if (r) { |
461 | dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", | 472 | dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", |
@@ -476,7 +487,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) | |||
476 | 487 | ||
477 | /* restore page table */ | 488 | /* restore page table */ |
478 | list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { | 489 | list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { |
479 | if (vm->sa_bo == NULL) | 490 | if (vm->page_directory == NULL) |
480 | continue; | 491 | continue; |
481 | 492 | ||
482 | list_for_each_entry(bo_va, &vm->va, vm_list) { | 493 | list_for_each_entry(bo_va, &vm->va, vm_list) { |
@@ -500,16 +511,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, | |||
500 | struct radeon_vm *vm) | 511 | struct radeon_vm *vm) |
501 | { | 512 | { |
502 | struct radeon_bo_va *bo_va; | 513 | struct radeon_bo_va *bo_va; |
514 | int i; | ||
503 | 515 | ||
504 | if (!vm->sa_bo) | 516 | if (!vm->page_directory) |
505 | return; | 517 | return; |
506 | 518 | ||
507 | list_del_init(&vm->list); | 519 | list_del_init(&vm->list); |
508 | radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); | 520 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); |
509 | 521 | ||
510 | list_for_each_entry(bo_va, &vm->va, vm_list) { | 522 | list_for_each_entry(bo_va, &vm->va, vm_list) { |
511 | bo_va->valid = false; | 523 | bo_va->valid = false; |
512 | } | 524 | } |
525 | |||
526 | if (vm->page_tables == NULL) | ||
527 | return; | ||
528 | |||
529 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) | ||
530 | radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); | ||
531 | |||
532 | kfree(vm->page_tables); | ||
513 | } | 533 | } |
514 | 534 | ||
515 | /** | 535 | /** |
@@ -546,63 +566,106 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) | |||
546 | } | 566 | } |
547 | 567 | ||
548 | /** | 568 | /** |
569 | * radeon_vm_evict - evict page table to make room for new one | ||
570 | * | ||
571 | * @rdev: radeon_device pointer | ||
572 | * @vm: VM we want to allocate something for | ||
573 | * | ||
574 | * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). | ||
575 | * Returns 0 for success, -ENOMEM for failure. | ||
576 | * | ||
577 | * Global and local mutex must be locked! | ||
578 | */ | ||
579 | static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) | ||
580 | { | ||
581 | struct radeon_vm *vm_evict; | ||
582 | |||
583 | if (list_empty(&rdev->vm_manager.lru_vm)) | ||
584 | return -ENOMEM; | ||
585 | |||
586 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, | ||
587 | struct radeon_vm, list); | ||
588 | if (vm_evict == vm) | ||
589 | return -ENOMEM; | ||
590 | |||
591 | mutex_lock(&vm_evict->mutex); | ||
592 | radeon_vm_free_pt(rdev, vm_evict); | ||
593 | mutex_unlock(&vm_evict->mutex); | ||
594 | return 0; | ||
595 | } | ||
596 | |||
597 | /** | ||
549 | * radeon_vm_alloc_pt - allocates a page table for a VM | 598 | * radeon_vm_alloc_pt - allocates a page table for a VM |
550 | * | 599 | * |
551 | * @rdev: radeon_device pointer | 600 | * @rdev: radeon_device pointer |
552 | * @vm: vm to bind | 601 | * @vm: vm to bind |
553 | * | 602 | * |
554 | * Allocate a page table for the requested vm (cayman+). | 603 | * Allocate a page table for the requested vm (cayman+). |
555 | * Also starts to populate the page table. | ||
556 | * Returns 0 for success, error for failure. | 604 | * Returns 0 for success, error for failure. |
557 | * | 605 | * |
558 | * Global and local mutex must be locked! | 606 | * Global and local mutex must be locked! |
559 | */ | 607 | */ |
560 | int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) | 608 | int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) |
561 | { | 609 | { |
562 | struct radeon_vm *vm_evict; | 610 | unsigned pd_size, pts_size; |
563 | int r; | ||
564 | u64 *pd_addr; | 611 | u64 *pd_addr; |
565 | int tables_size; | 612 | int r; |
566 | 613 | ||
567 | if (vm == NULL) { | 614 | if (vm == NULL) { |
568 | return -EINVAL; | 615 | return -EINVAL; |
569 | } | 616 | } |
570 | 617 | ||
571 | /* allocate enough to cover the current VM size */ | 618 | if (vm->page_directory != NULL) { |
572 | tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); | ||
573 | tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8); | ||
574 | |||
575 | if (vm->sa_bo != NULL) { | ||
576 | /* update lru */ | ||
577 | list_del_init(&vm->list); | ||
578 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | ||
579 | return 0; | 619 | return 0; |
580 | } | 620 | } |
581 | 621 | ||
582 | retry: | 622 | retry: |
583 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, | 623 | pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); |
584 | tables_size, RADEON_GPU_PAGE_SIZE, false); | 624 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, |
625 | &vm->page_directory, pd_size, | ||
626 | RADEON_GPU_PAGE_SIZE, false); | ||
585 | if (r == -ENOMEM) { | 627 | if (r == -ENOMEM) { |
586 | if (list_empty(&rdev->vm_manager.lru_vm)) { | 628 | r = radeon_vm_evict(rdev, vm); |
629 | if (r) | ||
587 | return r; | 630 | return r; |
588 | } | ||
589 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); | ||
590 | mutex_lock(&vm_evict->mutex); | ||
591 | radeon_vm_free_pt(rdev, vm_evict); | ||
592 | mutex_unlock(&vm_evict->mutex); | ||
593 | goto retry; | 631 | goto retry; |
594 | 632 | ||
595 | } else if (r) { | 633 | } else if (r) { |
596 | return r; | 634 | return r; |
597 | } | 635 | } |
598 | 636 | ||
599 | pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); | 637 | vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); |
600 | vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); | 638 | |
601 | memset(pd_addr, 0, tables_size); | 639 | /* Initially clear the page directory */ |
640 | pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory); | ||
641 | memset(pd_addr, 0, pd_size); | ||
642 | |||
643 | pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); | ||
644 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); | ||
645 | |||
646 | if (vm->page_tables == NULL) { | ||
647 | DRM_ERROR("Cannot allocate memory for page table array\n"); | ||
648 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); | ||
649 | return -ENOMEM; | ||
650 | } | ||
651 | |||
652 | return 0; | ||
653 | } | ||
602 | 654 | ||
655 | /** | ||
656 | * radeon_vm_add_to_lru - add VMs page table to LRU list | ||
657 | * | ||
658 | * @rdev: radeon_device pointer | ||
659 | * @vm: vm to add to LRU | ||
660 | * | ||
661 | * Add the allocated page table to the LRU list (cayman+). | ||
662 | * | ||
663 | * Global mutex must be locked! | ||
664 | */ | ||
665 | void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) | ||
666 | { | ||
667 | list_del_init(&vm->list); | ||
603 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | 668 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); |
604 | return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, | ||
605 | &rdev->ring_tmp_bo.bo->tbo.mem); | ||
606 | } | 669 | } |
607 | 670 | ||
608 | /** | 671 | /** |
@@ -793,20 +856,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |||
793 | } | 856 | } |
794 | 857 | ||
795 | mutex_lock(&vm->mutex); | 858 | mutex_lock(&vm->mutex); |
796 | if (last_pfn > vm->last_pfn) { | ||
797 | /* release mutex and lock in right order */ | ||
798 | mutex_unlock(&vm->mutex); | ||
799 | mutex_lock(&rdev->vm_manager.lock); | ||
800 | mutex_lock(&vm->mutex); | ||
801 | /* and check again */ | ||
802 | if (last_pfn > vm->last_pfn) { | ||
803 | /* grow va space 32M by 32M */ | ||
804 | unsigned align = ((32 << 20) >> 12) - 1; | ||
805 | radeon_vm_free_pt(rdev, vm); | ||
806 | vm->last_pfn = (last_pfn + align) & ~align; | ||
807 | } | ||
808 | mutex_unlock(&rdev->vm_manager.lock); | ||
809 | } | ||
810 | head = &vm->va; | 859 | head = &vm->va; |
811 | last_offset = 0; | 860 | last_offset = 0; |
812 | list_for_each_entry(tmp, &vm->va, vm_list) { | 861 | list_for_each_entry(tmp, &vm->va, vm_list) { |
@@ -865,6 +914,154 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) | |||
865 | } | 914 | } |
866 | 915 | ||
867 | /** | 916 | /** |
917 | * radeon_vm_update_pdes - make sure that page directory is valid | ||
918 | * | ||
919 | * @rdev: radeon_device pointer | ||
920 | * @vm: requested vm | ||
921 | * @start: start of GPU address range | ||
922 | * @end: end of GPU address range | ||
923 | * | ||
924 | * Allocates new page tables if necessary | ||
925 | * and updates the page directory (cayman+). | ||
926 | * Returns 0 for success, error for failure. | ||
927 | * | ||
928 | * Global and local mutex must be locked! | ||
929 | */ | ||
930 | static int radeon_vm_update_pdes(struct radeon_device *rdev, | ||
931 | struct radeon_vm *vm, | ||
932 | uint64_t start, uint64_t end) | ||
933 | { | ||
934 | static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; | ||
935 | |||
936 | uint64_t last_pde = ~0, last_pt = ~0; | ||
937 | unsigned count = 0; | ||
938 | uint64_t pt_idx; | ||
939 | int r; | ||
940 | |||
941 | start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; | ||
942 | end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; | ||
943 | |||
944 | /* walk over the address space and update the page directory */ | ||
945 | for (pt_idx = start; pt_idx <= end; ++pt_idx) { | ||
946 | uint64_t pde, pt; | ||
947 | |||
948 | if (vm->page_tables[pt_idx]) | ||
949 | continue; | ||
950 | |||
951 | retry: | ||
952 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, | ||
953 | &vm->page_tables[pt_idx], | ||
954 | RADEON_VM_PTE_COUNT * 8, | ||
955 | RADEON_GPU_PAGE_SIZE, false); | ||
956 | |||
957 | if (r == -ENOMEM) { | ||
958 | r = radeon_vm_evict(rdev, vm); | ||
959 | if (r) | ||
960 | return r; | ||
961 | goto retry; | ||
962 | } else if (r) { | ||
963 | return r; | ||
964 | } | ||
965 | |||
966 | pde = vm->pd_gpu_addr + pt_idx * 8; | ||
967 | |||
968 | pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); | ||
969 | |||
970 | if (((last_pde + 8 * count) != pde) || | ||
971 | ((last_pt + incr * count) != pt)) { | ||
972 | |||
973 | if (count) { | ||
974 | radeon_asic_vm_set_page(rdev, last_pde, | ||
975 | last_pt, count, incr, | ||
976 | RADEON_VM_PAGE_VALID); | ||
977 | } | ||
978 | |||
979 | count = 1; | ||
980 | last_pde = pde; | ||
981 | last_pt = pt; | ||
982 | } else { | ||
983 | ++count; | ||
984 | } | ||
985 | } | ||
986 | |||
987 | if (count) { | ||
988 | radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, | ||
989 | incr, RADEON_VM_PAGE_VALID); | ||
990 | |||
991 | } | ||
992 | |||
993 | return 0; | ||
994 | } | ||
995 | |||
996 | /** | ||
997 | * radeon_vm_update_ptes - make sure that page tables are valid | ||
998 | * | ||
999 | * @rdev: radeon_device pointer | ||
1000 | * @vm: requested vm | ||
1001 | * @start: start of GPU address range | ||
1002 | * @end: end of GPU address range | ||
1003 | * @dst: destination address to map to | ||
1004 | * @flags: mapping flags | ||
1005 | * | ||
1006 | * Update the page tables in the range @start - @end (cayman+). | ||
1007 | * | ||
1008 | * Global and local mutex must be locked! | ||
1009 | */ | ||
1010 | static void radeon_vm_update_ptes(struct radeon_device *rdev, | ||
1011 | struct radeon_vm *vm, | ||
1012 | uint64_t start, uint64_t end, | ||
1013 | uint64_t dst, uint32_t flags) | ||
1014 | { | ||
1015 | static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; | ||
1016 | |||
1017 | uint64_t last_pte = ~0, last_dst = ~0; | ||
1018 | unsigned count = 0; | ||
1019 | uint64_t addr; | ||
1020 | |||
1021 | start = start / RADEON_GPU_PAGE_SIZE; | ||
1022 | end = end / RADEON_GPU_PAGE_SIZE; | ||
1023 | |||
1024 | /* walk over the address space and update the page tables */ | ||
1025 | for (addr = start; addr < end; ) { | ||
1026 | uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; | ||
1027 | unsigned nptes; | ||
1028 | uint64_t pte; | ||
1029 | |||
1030 | if ((addr & ~mask) == (end & ~mask)) | ||
1031 | nptes = end - addr; | ||
1032 | else | ||
1033 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); | ||
1034 | |||
1035 | pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); | ||
1036 | pte += (addr & mask) * 8; | ||
1037 | |||
1038 | if ((last_pte + 8 * count) != pte) { | ||
1039 | |||
1040 | if (count) { | ||
1041 | radeon_asic_vm_set_page(rdev, last_pte, | ||
1042 | last_dst, count, | ||
1043 | RADEON_GPU_PAGE_SIZE, | ||
1044 | flags); | ||
1045 | } | ||
1046 | |||
1047 | count = nptes; | ||
1048 | last_pte = pte; | ||
1049 | last_dst = dst; | ||
1050 | } else { | ||
1051 | count += nptes; | ||
1052 | } | ||
1053 | |||
1054 | addr += nptes; | ||
1055 | dst += nptes * RADEON_GPU_PAGE_SIZE; | ||
1056 | } | ||
1057 | |||
1058 | if (count) { | ||
1059 | radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, | ||
1060 | RADEON_GPU_PAGE_SIZE, flags); | ||
1061 | } | ||
1062 | } | ||
1063 | |||
1064 | /** | ||
868 | * radeon_vm_bo_update_pte - map a bo into the vm page table | 1065 | * radeon_vm_bo_update_pte - map a bo into the vm page table |
869 | * | 1066 | * |
870 | * @rdev: radeon_device pointer | 1067 | * @rdev: radeon_device pointer |
@@ -887,12 +1084,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
887 | struct radeon_semaphore *sem = NULL; | 1084 | struct radeon_semaphore *sem = NULL; |
888 | struct radeon_bo_va *bo_va; | 1085 | struct radeon_bo_va *bo_va; |
889 | unsigned nptes, npdes, ndw; | 1086 | unsigned nptes, npdes, ndw; |
890 | uint64_t pe, addr; | 1087 | uint64_t addr; |
891 | uint64_t pfn; | ||
892 | int r; | 1088 | int r; |
893 | 1089 | ||
894 | /* nothing to do if vm isn't bound */ | 1090 | /* nothing to do if vm isn't bound */ |
895 | if (vm->sa_bo == NULL) | 1091 | if (vm->page_directory == NULL) |
896 | return 0; | 1092 | return 0; |
897 | 1093 | ||
898 | bo_va = radeon_vm_bo_find(vm, bo); | 1094 | bo_va = radeon_vm_bo_find(vm, bo); |
@@ -939,25 +1135,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
939 | } | 1135 | } |
940 | } | 1136 | } |
941 | 1137 | ||
942 | /* estimate number of dw needed */ | ||
943 | /* reserve space for 32-bit padding */ | ||
944 | ndw = 32; | ||
945 | |||
946 | nptes = radeon_bo_ngpu_pages(bo); | 1138 | nptes = radeon_bo_ngpu_pages(bo); |
947 | 1139 | ||
948 | pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); | 1140 | /* assume two extra pdes in case the mapping overlaps the borders */ |
1141 | npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; | ||
949 | 1142 | ||
950 | /* handle cases where a bo spans several pdes */ | 1143 | /* estimate number of dw needed */ |
951 | npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - | 1144 | /* semaphore, fence and padding */ |
952 | (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; | 1145 | ndw = 32; |
1146 | |||
1147 | if (RADEON_VM_BLOCK_SIZE > 11) | ||
1148 | /* reserve space for one header for every 2k dwords */ | ||
1149 | ndw += (nptes >> 11) * 4; | ||
1150 | else | ||
1151 | /* reserve space for one header for | ||
1152 | every (1 << BLOCK_SIZE) entries */ | ||
1153 | ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; | ||
953 | 1154 | ||
954 | /* reserve space for one header for every 2k dwords */ | ||
955 | ndw += (nptes >> 11) * 3; | ||
956 | /* reserve space for pte addresses */ | 1155 | /* reserve space for pte addresses */ |
957 | ndw += nptes * 2; | 1156 | ndw += nptes * 2; |
958 | 1157 | ||
959 | /* reserve space for one header for every 2k dwords */ | 1158 | /* reserve space for one header for every 2k dwords */ |
960 | ndw += (npdes >> 11) * 3; | 1159 | ndw += (npdes >> 11) * 4; |
1160 | |||
961 | /* reserve space for pde addresses */ | 1161 | /* reserve space for pde addresses */ |
962 | ndw += npdes * 2; | 1162 | ndw += npdes * 2; |
963 | 1163 | ||
@@ -971,22 +1171,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
971 | radeon_fence_note_sync(vm->fence, ridx); | 1171 | radeon_fence_note_sync(vm->fence, ridx); |
972 | } | 1172 | } |
973 | 1173 | ||
974 | /* update page table entries */ | 1174 | r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); |
975 | pe = vm->pd_gpu_addr; | 1175 | if (r) { |
976 | pe += radeon_vm_directory_size(rdev); | 1176 | radeon_ring_unlock_undo(rdev, ring); |
977 | pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; | 1177 | return r; |
978 | 1178 | } | |
979 | radeon_asic_vm_set_page(rdev, pe, addr, nptes, | ||
980 | RADEON_GPU_PAGE_SIZE, bo_va->flags); | ||
981 | |||
982 | /* update page directory entries */ | ||
983 | addr = pe; | ||
984 | |||
985 | pe = vm->pd_gpu_addr; | ||
986 | pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8; | ||
987 | 1179 | ||
988 | radeon_asic_vm_set_page(rdev, pe, addr, npdes, | 1180 | radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, |
989 | RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); | 1181 | addr, bo_va->flags); |
990 | 1182 | ||
991 | radeon_fence_unref(&vm->fence); | 1183 | radeon_fence_unref(&vm->fence); |
992 | r = radeon_fence_emit(rdev, &vm->fence, ridx); | 1184 | r = radeon_fence_emit(rdev, &vm->fence, ridx); |
@@ -997,6 +1189,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
997 | radeon_ring_unlock_commit(rdev, ring); | 1189 | radeon_ring_unlock_commit(rdev, ring); |
998 | radeon_semaphore_free(rdev, &sem, vm->fence); | 1190 | radeon_semaphore_free(rdev, &sem, vm->fence); |
999 | radeon_fence_unref(&vm->last_flush); | 1191 | radeon_fence_unref(&vm->last_flush); |
1192 | |||
1000 | return 0; | 1193 | return 0; |
1001 | } | 1194 | } |
1002 | 1195 | ||
@@ -1056,31 +1249,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, | |||
1056 | * @rdev: radeon_device pointer | 1249 | * @rdev: radeon_device pointer |
1057 | * @vm: requested vm | 1250 | * @vm: requested vm |
1058 | * | 1251 | * |
1059 | * Init @vm (cayman+). | 1252 | * Init @vm fields (cayman+). |
1060 | * Map the IB pool and any other shared objects into the VM | ||
1061 | * by default as it's used by all VMs. | ||
1062 | * Returns 0 for success, error for failure. | ||
1063 | */ | 1253 | */ |
1064 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) | 1254 | void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
1065 | { | 1255 | { |
1066 | struct radeon_bo_va *bo_va; | ||
1067 | int r; | ||
1068 | |||
1069 | vm->id = 0; | 1256 | vm->id = 0; |
1070 | vm->fence = NULL; | 1257 | vm->fence = NULL; |
1071 | vm->last_pfn = 0; | ||
1072 | mutex_init(&vm->mutex); | 1258 | mutex_init(&vm->mutex); |
1073 | INIT_LIST_HEAD(&vm->list); | 1259 | INIT_LIST_HEAD(&vm->list); |
1074 | INIT_LIST_HEAD(&vm->va); | 1260 | INIT_LIST_HEAD(&vm->va); |
1075 | |||
1076 | /* map the ib pool buffer at 0 in virtual address space, set | ||
1077 | * read only | ||
1078 | */ | ||
1079 | bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); | ||
1080 | r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, | ||
1081 | RADEON_VM_PAGE_READABLE | | ||
1082 | RADEON_VM_PAGE_SNOOPED); | ||
1083 | return r; | ||
1084 | } | 1261 | } |
1085 | 1262 | ||
1086 | /** | 1263 | /** |
@@ -1102,17 +1279,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) | |||
1102 | radeon_vm_free_pt(rdev, vm); | 1279 | radeon_vm_free_pt(rdev, vm); |
1103 | mutex_unlock(&rdev->vm_manager.lock); | 1280 | mutex_unlock(&rdev->vm_manager.lock); |
1104 | 1281 | ||
1105 | /* remove all bo at this point non are busy any more because unbind | ||
1106 | * waited for the last vm fence to signal | ||
1107 | */ | ||
1108 | r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); | ||
1109 | if (!r) { | ||
1110 | bo_va = radeon_vm_bo_find(vm, rdev->ring_tmp_bo.bo); | ||
1111 | list_del_init(&bo_va->bo_list); | ||
1112 | list_del_init(&bo_va->vm_list); | ||
1113 | radeon_bo_unreserve(rdev->ring_tmp_bo.bo); | ||
1114 | kfree(bo_va); | ||
1115 | } | ||
1116 | if (!list_empty(&vm->va)) { | 1282 | if (!list_empty(&vm->va)) { |
1117 | dev_err(rdev->dev, "still active bo inside vm\n"); | 1283 | dev_err(rdev->dev, "still active bo inside vm\n"); |
1118 | } | 1284 | } |