diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_gart.c')
| -rw-r--r-- | drivers/gpu/drm/radeon/radeon_gart.c | 386 |
1 files changed, 276 insertions, 110 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index f0c06d196b75..4debd60e5aa6 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c | |||
| @@ -355,14 +355,13 @@ int radeon_gart_init(struct radeon_device *rdev) | |||
| 355 | DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", | 355 | DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", |
| 356 | rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); | 356 | rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); |
| 357 | /* Allocate pages table */ | 357 | /* Allocate pages table */ |
| 358 | rdev->gart.pages = kzalloc(sizeof(void *) * rdev->gart.num_cpu_pages, | 358 | rdev->gart.pages = vzalloc(sizeof(void *) * rdev->gart.num_cpu_pages); |
| 359 | GFP_KERNEL); | ||
| 360 | if (rdev->gart.pages == NULL) { | 359 | if (rdev->gart.pages == NULL) { |
| 361 | radeon_gart_fini(rdev); | 360 | radeon_gart_fini(rdev); |
| 362 | return -ENOMEM; | 361 | return -ENOMEM; |
| 363 | } | 362 | } |
| 364 | rdev->gart.pages_addr = kzalloc(sizeof(dma_addr_t) * | 363 | rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) * |
| 365 | rdev->gart.num_cpu_pages, GFP_KERNEL); | 364 | rdev->gart.num_cpu_pages); |
| 366 | if (rdev->gart.pages_addr == NULL) { | 365 | if (rdev->gart.pages_addr == NULL) { |
| 367 | radeon_gart_fini(rdev); | 366 | radeon_gart_fini(rdev); |
| 368 | return -ENOMEM; | 367 | return -ENOMEM; |
| @@ -388,8 +387,8 @@ void radeon_gart_fini(struct radeon_device *rdev) | |||
| 388 | radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); | 387 | radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); |
| 389 | } | 388 | } |
| 390 | rdev->gart.ready = false; | 389 | rdev->gart.ready = false; |
| 391 | kfree(rdev->gart.pages); | 390 | vfree(rdev->gart.pages); |
| 392 | kfree(rdev->gart.pages_addr); | 391 | vfree(rdev->gart.pages_addr); |
| 393 | rdev->gart.pages = NULL; | 392 | rdev->gart.pages = NULL; |
| 394 | rdev->gart.pages_addr = NULL; | 393 | rdev->gart.pages_addr = NULL; |
| 395 | 394 | ||
| @@ -423,6 +422,18 @@ void radeon_gart_fini(struct radeon_device *rdev) | |||
| 423 | */ | 422 | */ |
| 424 | 423 | ||
| 425 | /** | 424 | /** |
| 425 | * radeon_vm_num_pde - return the number of page directory entries | ||
| 426 | * | ||
| 427 | * @rdev: radeon_device pointer | ||
| 428 | * | ||
| 429 | * Calculate the number of page directory entries (cayman+). | ||
| 430 | */ | ||
| 431 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) | ||
| 432 | { | ||
| 433 | return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; | ||
| 434 | } | ||
| 435 | |||
| 436 | /** | ||
| 426 | * radeon_vm_directory_size - returns the size of the page directory in bytes | 437 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
| 427 | * | 438 | * |
| 428 | * @rdev: radeon_device pointer | 439 | * @rdev: radeon_device pointer |
| @@ -431,7 +442,7 @@ void radeon_gart_fini(struct radeon_device *rdev) | |||
| 431 | */ | 442 | */ |
| 432 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) | 443 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
| 433 | { | 444 | { |
| 434 | return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; | 445 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
| 435 | } | 446 | } |
| 436 | 447 | ||
| 437 | /** | 448 | /** |
| @@ -451,11 +462,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev) | |||
| 451 | 462 | ||
| 452 | if (!rdev->vm_manager.enabled) { | 463 | if (!rdev->vm_manager.enabled) { |
| 453 | /* allocate enough for 2 full VM pts */ | 464 | /* allocate enough for 2 full VM pts */ |
| 454 | size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); | 465 | size = radeon_vm_directory_size(rdev); |
| 455 | size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); | 466 | size += rdev->vm_manager.max_pfn * 8; |
| 456 | size *= 2; | 467 | size *= 2; |
| 457 | r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, | 468 | r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, |
| 458 | size, | 469 | RADEON_GPU_PAGE_ALIGN(size), |
| 459 | RADEON_GEM_DOMAIN_VRAM); | 470 | RADEON_GEM_DOMAIN_VRAM); |
| 460 | if (r) { | 471 | if (r) { |
| 461 | dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", | 472 | dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", |
| @@ -476,7 +487,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) | |||
| 476 | 487 | ||
| 477 | /* restore page table */ | 488 | /* restore page table */ |
| 478 | list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { | 489 | list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { |
| 479 | if (vm->sa_bo == NULL) | 490 | if (vm->page_directory == NULL) |
| 480 | continue; | 491 | continue; |
| 481 | 492 | ||
| 482 | list_for_each_entry(bo_va, &vm->va, vm_list) { | 493 | list_for_each_entry(bo_va, &vm->va, vm_list) { |
| @@ -500,16 +511,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, | |||
| 500 | struct radeon_vm *vm) | 511 | struct radeon_vm *vm) |
| 501 | { | 512 | { |
| 502 | struct radeon_bo_va *bo_va; | 513 | struct radeon_bo_va *bo_va; |
| 514 | int i; | ||
| 503 | 515 | ||
| 504 | if (!vm->sa_bo) | 516 | if (!vm->page_directory) |
| 505 | return; | 517 | return; |
| 506 | 518 | ||
| 507 | list_del_init(&vm->list); | 519 | list_del_init(&vm->list); |
| 508 | radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); | 520 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); |
| 509 | 521 | ||
| 510 | list_for_each_entry(bo_va, &vm->va, vm_list) { | 522 | list_for_each_entry(bo_va, &vm->va, vm_list) { |
| 511 | bo_va->valid = false; | 523 | bo_va->valid = false; |
| 512 | } | 524 | } |
| 525 | |||
| 526 | if (vm->page_tables == NULL) | ||
| 527 | return; | ||
| 528 | |||
| 529 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) | ||
| 530 | radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); | ||
| 531 | |||
| 532 | kfree(vm->page_tables); | ||
| 513 | } | 533 | } |
| 514 | 534 | ||
| 515 | /** | 535 | /** |
| @@ -546,63 +566,106 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) | |||
| 546 | } | 566 | } |
| 547 | 567 | ||
| 548 | /** | 568 | /** |
| 569 | * radeon_vm_evict - evict page table to make room for new one | ||
| 570 | * | ||
| 571 | * @rdev: radeon_device pointer | ||
| 572 | * @vm: VM we want to allocate something for | ||
| 573 | * | ||
| 574 | * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). | ||
| 575 | * Returns 0 for success, -ENOMEM for failure. | ||
| 576 | * | ||
| 577 | * Global and local mutex must be locked! | ||
| 578 | */ | ||
| 579 | static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) | ||
| 580 | { | ||
| 581 | struct radeon_vm *vm_evict; | ||
| 582 | |||
| 583 | if (list_empty(&rdev->vm_manager.lru_vm)) | ||
| 584 | return -ENOMEM; | ||
| 585 | |||
| 586 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, | ||
| 587 | struct radeon_vm, list); | ||
| 588 | if (vm_evict == vm) | ||
| 589 | return -ENOMEM; | ||
| 590 | |||
| 591 | mutex_lock(&vm_evict->mutex); | ||
| 592 | radeon_vm_free_pt(rdev, vm_evict); | ||
| 593 | mutex_unlock(&vm_evict->mutex); | ||
| 594 | return 0; | ||
| 595 | } | ||
| 596 | |||
| 597 | /** | ||
| 549 | * radeon_vm_alloc_pt - allocates a page table for a VM | 598 | * radeon_vm_alloc_pt - allocates a page table for a VM |
| 550 | * | 599 | * |
| 551 | * @rdev: radeon_device pointer | 600 | * @rdev: radeon_device pointer |
| 552 | * @vm: vm to bind | 601 | * @vm: vm to bind |
| 553 | * | 602 | * |
| 554 | * Allocate a page table for the requested vm (cayman+). | 603 | * Allocate a page table for the requested vm (cayman+). |
| 555 | * Also starts to populate the page table. | ||
| 556 | * Returns 0 for success, error for failure. | 604 | * Returns 0 for success, error for failure. |
| 557 | * | 605 | * |
| 558 | * Global and local mutex must be locked! | 606 | * Global and local mutex must be locked! |
| 559 | */ | 607 | */ |
| 560 | int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) | 608 | int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) |
| 561 | { | 609 | { |
| 562 | struct radeon_vm *vm_evict; | 610 | unsigned pd_size, pts_size; |
| 563 | int r; | ||
| 564 | u64 *pd_addr; | 611 | u64 *pd_addr; |
| 565 | int tables_size; | 612 | int r; |
| 566 | 613 | ||
| 567 | if (vm == NULL) { | 614 | if (vm == NULL) { |
| 568 | return -EINVAL; | 615 | return -EINVAL; |
| 569 | } | 616 | } |
| 570 | 617 | ||
| 571 | /* allocate enough to cover the current VM size */ | 618 | if (vm->page_directory != NULL) { |
| 572 | tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); | ||
| 573 | tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8); | ||
| 574 | |||
| 575 | if (vm->sa_bo != NULL) { | ||
| 576 | /* update lru */ | ||
| 577 | list_del_init(&vm->list); | ||
| 578 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | ||
| 579 | return 0; | 619 | return 0; |
| 580 | } | 620 | } |
| 581 | 621 | ||
| 582 | retry: | 622 | retry: |
| 583 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, | 623 | pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); |
| 584 | tables_size, RADEON_GPU_PAGE_SIZE, false); | 624 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, |
| 625 | &vm->page_directory, pd_size, | ||
| 626 | RADEON_GPU_PAGE_SIZE, false); | ||
| 585 | if (r == -ENOMEM) { | 627 | if (r == -ENOMEM) { |
| 586 | if (list_empty(&rdev->vm_manager.lru_vm)) { | 628 | r = radeon_vm_evict(rdev, vm); |
| 629 | if (r) | ||
| 587 | return r; | 630 | return r; |
| 588 | } | ||
| 589 | vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); | ||
| 590 | mutex_lock(&vm_evict->mutex); | ||
| 591 | radeon_vm_free_pt(rdev, vm_evict); | ||
| 592 | mutex_unlock(&vm_evict->mutex); | ||
| 593 | goto retry; | 631 | goto retry; |
| 594 | 632 | ||
| 595 | } else if (r) { | 633 | } else if (r) { |
| 596 | return r; | 634 | return r; |
| 597 | } | 635 | } |
| 598 | 636 | ||
| 599 | pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); | 637 | vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); |
| 600 | vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); | 638 | |
| 601 | memset(pd_addr, 0, tables_size); | 639 | /* Initially clear the page directory */ |
| 640 | pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory); | ||
| 641 | memset(pd_addr, 0, pd_size); | ||
| 642 | |||
| 643 | pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); | ||
| 644 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); | ||
| 645 | |||
| 646 | if (vm->page_tables == NULL) { | ||
| 647 | DRM_ERROR("Cannot allocate memory for page table array\n"); | ||
| 648 | radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); | ||
| 649 | return -ENOMEM; | ||
| 650 | } | ||
| 651 | |||
| 652 | return 0; | ||
| 653 | } | ||
| 602 | 654 | ||
| 655 | /** | ||
| 656 | * radeon_vm_add_to_lru - add VMs page table to LRU list | ||
| 657 | * | ||
| 658 | * @rdev: radeon_device pointer | ||
| 659 | * @vm: vm to add to LRU | ||
| 660 | * | ||
| 661 | * Add the allocated page table to the LRU list (cayman+). | ||
| 662 | * | ||
| 663 | * Global mutex must be locked! | ||
| 664 | */ | ||
| 665 | void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) | ||
| 666 | { | ||
| 667 | list_del_init(&vm->list); | ||
| 603 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); | 668 | list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); |
| 604 | return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, | ||
| 605 | &rdev->ring_tmp_bo.bo->tbo.mem); | ||
| 606 | } | 669 | } |
| 607 | 670 | ||
| 608 | /** | 671 | /** |
| @@ -793,20 +856,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |||
| 793 | } | 856 | } |
| 794 | 857 | ||
| 795 | mutex_lock(&vm->mutex); | 858 | mutex_lock(&vm->mutex); |
| 796 | if (last_pfn > vm->last_pfn) { | ||
| 797 | /* release mutex and lock in right order */ | ||
| 798 | mutex_unlock(&vm->mutex); | ||
| 799 | mutex_lock(&rdev->vm_manager.lock); | ||
| 800 | mutex_lock(&vm->mutex); | ||
| 801 | /* and check again */ | ||
| 802 | if (last_pfn > vm->last_pfn) { | ||
| 803 | /* grow va space 32M by 32M */ | ||
| 804 | unsigned align = ((32 << 20) >> 12) - 1; | ||
| 805 | radeon_vm_free_pt(rdev, vm); | ||
| 806 | vm->last_pfn = (last_pfn + align) & ~align; | ||
| 807 | } | ||
| 808 | mutex_unlock(&rdev->vm_manager.lock); | ||
| 809 | } | ||
| 810 | head = &vm->va; | 859 | head = &vm->va; |
| 811 | last_offset = 0; | 860 | last_offset = 0; |
| 812 | list_for_each_entry(tmp, &vm->va, vm_list) { | 861 | list_for_each_entry(tmp, &vm->va, vm_list) { |
| @@ -865,6 +914,154 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) | |||
| 865 | } | 914 | } |
| 866 | 915 | ||
| 867 | /** | 916 | /** |
| 917 | * radeon_vm_update_pdes - make sure that page directory is valid | ||
| 918 | * | ||
| 919 | * @rdev: radeon_device pointer | ||
| 920 | * @vm: requested vm | ||
| 921 | * @start: start of GPU address range | ||
| 922 | * @end: end of GPU address range | ||
| 923 | * | ||
| 924 | * Allocates new page tables if necessary | ||
| 925 | * and updates the page directory (cayman+). | ||
| 926 | * Returns 0 for success, error for failure. | ||
| 927 | * | ||
| 928 | * Global and local mutex must be locked! | ||
| 929 | */ | ||
| 930 | static int radeon_vm_update_pdes(struct radeon_device *rdev, | ||
| 931 | struct radeon_vm *vm, | ||
| 932 | uint64_t start, uint64_t end) | ||
| 933 | { | ||
| 934 | static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; | ||
| 935 | |||
| 936 | uint64_t last_pde = ~0, last_pt = ~0; | ||
| 937 | unsigned count = 0; | ||
| 938 | uint64_t pt_idx; | ||
| 939 | int r; | ||
| 940 | |||
| 941 | start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; | ||
| 942 | end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; | ||
| 943 | |||
| 944 | /* walk over the address space and update the page directory */ | ||
| 945 | for (pt_idx = start; pt_idx <= end; ++pt_idx) { | ||
| 946 | uint64_t pde, pt; | ||
| 947 | |||
| 948 | if (vm->page_tables[pt_idx]) | ||
| 949 | continue; | ||
| 950 | |||
| 951 | retry: | ||
| 952 | r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, | ||
| 953 | &vm->page_tables[pt_idx], | ||
| 954 | RADEON_VM_PTE_COUNT * 8, | ||
| 955 | RADEON_GPU_PAGE_SIZE, false); | ||
| 956 | |||
| 957 | if (r == -ENOMEM) { | ||
| 958 | r = radeon_vm_evict(rdev, vm); | ||
| 959 | if (r) | ||
| 960 | return r; | ||
| 961 | goto retry; | ||
| 962 | } else if (r) { | ||
| 963 | return r; | ||
| 964 | } | ||
| 965 | |||
| 966 | pde = vm->pd_gpu_addr + pt_idx * 8; | ||
| 967 | |||
| 968 | pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); | ||
| 969 | |||
| 970 | if (((last_pde + 8 * count) != pde) || | ||
| 971 | ((last_pt + incr * count) != pt)) { | ||
| 972 | |||
| 973 | if (count) { | ||
| 974 | radeon_asic_vm_set_page(rdev, last_pde, | ||
| 975 | last_pt, count, incr, | ||
| 976 | RADEON_VM_PAGE_VALID); | ||
| 977 | } | ||
| 978 | |||
| 979 | count = 1; | ||
| 980 | last_pde = pde; | ||
| 981 | last_pt = pt; | ||
| 982 | } else { | ||
| 983 | ++count; | ||
| 984 | } | ||
| 985 | } | ||
| 986 | |||
| 987 | if (count) { | ||
| 988 | radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, | ||
| 989 | incr, RADEON_VM_PAGE_VALID); | ||
| 990 | |||
| 991 | } | ||
| 992 | |||
| 993 | return 0; | ||
| 994 | } | ||
| 995 | |||
| 996 | /** | ||
| 997 | * radeon_vm_update_ptes - make sure that page tables are valid | ||
| 998 | * | ||
| 999 | * @rdev: radeon_device pointer | ||
| 1000 | * @vm: requested vm | ||
| 1001 | * @start: start of GPU address range | ||
| 1002 | * @end: end of GPU address range | ||
| 1003 | * @dst: destination address to map to | ||
| 1004 | * @flags: mapping flags | ||
| 1005 | * | ||
| 1006 | * Update the page tables in the range @start - @end (cayman+). | ||
| 1007 | * | ||
| 1008 | * Global and local mutex must be locked! | ||
| 1009 | */ | ||
| 1010 | static void radeon_vm_update_ptes(struct radeon_device *rdev, | ||
| 1011 | struct radeon_vm *vm, | ||
| 1012 | uint64_t start, uint64_t end, | ||
| 1013 | uint64_t dst, uint32_t flags) | ||
| 1014 | { | ||
| 1015 | static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; | ||
| 1016 | |||
| 1017 | uint64_t last_pte = ~0, last_dst = ~0; | ||
| 1018 | unsigned count = 0; | ||
| 1019 | uint64_t addr; | ||
| 1020 | |||
| 1021 | start = start / RADEON_GPU_PAGE_SIZE; | ||
| 1022 | end = end / RADEON_GPU_PAGE_SIZE; | ||
| 1023 | |||
| 1024 | /* walk over the address space and update the page tables */ | ||
| 1025 | for (addr = start; addr < end; ) { | ||
| 1026 | uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; | ||
| 1027 | unsigned nptes; | ||
| 1028 | uint64_t pte; | ||
| 1029 | |||
| 1030 | if ((addr & ~mask) == (end & ~mask)) | ||
| 1031 | nptes = end - addr; | ||
| 1032 | else | ||
| 1033 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); | ||
| 1034 | |||
| 1035 | pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); | ||
| 1036 | pte += (addr & mask) * 8; | ||
| 1037 | |||
| 1038 | if ((last_pte + 8 * count) != pte) { | ||
| 1039 | |||
| 1040 | if (count) { | ||
| 1041 | radeon_asic_vm_set_page(rdev, last_pte, | ||
| 1042 | last_dst, count, | ||
| 1043 | RADEON_GPU_PAGE_SIZE, | ||
| 1044 | flags); | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | count = nptes; | ||
| 1048 | last_pte = pte; | ||
| 1049 | last_dst = dst; | ||
| 1050 | } else { | ||
| 1051 | count += nptes; | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | addr += nptes; | ||
| 1055 | dst += nptes * RADEON_GPU_PAGE_SIZE; | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | if (count) { | ||
| 1059 | radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, | ||
| 1060 | RADEON_GPU_PAGE_SIZE, flags); | ||
| 1061 | } | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | /** | ||
| 868 | * radeon_vm_bo_update_pte - map a bo into the vm page table | 1065 | * radeon_vm_bo_update_pte - map a bo into the vm page table |
| 869 | * | 1066 | * |
| 870 | * @rdev: radeon_device pointer | 1067 | * @rdev: radeon_device pointer |
| @@ -887,12 +1084,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
| 887 | struct radeon_semaphore *sem = NULL; | 1084 | struct radeon_semaphore *sem = NULL; |
| 888 | struct radeon_bo_va *bo_va; | 1085 | struct radeon_bo_va *bo_va; |
| 889 | unsigned nptes, npdes, ndw; | 1086 | unsigned nptes, npdes, ndw; |
| 890 | uint64_t pe, addr; | 1087 | uint64_t addr; |
| 891 | uint64_t pfn; | ||
| 892 | int r; | 1088 | int r; |
| 893 | 1089 | ||
| 894 | /* nothing to do if vm isn't bound */ | 1090 | /* nothing to do if vm isn't bound */ |
| 895 | if (vm->sa_bo == NULL) | 1091 | if (vm->page_directory == NULL) |
| 896 | return 0; | 1092 | return 0; |
| 897 | 1093 | ||
| 898 | bo_va = radeon_vm_bo_find(vm, bo); | 1094 | bo_va = radeon_vm_bo_find(vm, bo); |
| @@ -939,25 +1135,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
| 939 | } | 1135 | } |
| 940 | } | 1136 | } |
| 941 | 1137 | ||
| 942 | /* estimate number of dw needed */ | ||
| 943 | /* reserve space for 32-bit padding */ | ||
| 944 | ndw = 32; | ||
| 945 | |||
| 946 | nptes = radeon_bo_ngpu_pages(bo); | 1138 | nptes = radeon_bo_ngpu_pages(bo); |
| 947 | 1139 | ||
| 948 | pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); | 1140 | /* assume two extra pdes in case the mapping overlaps the borders */ |
| 1141 | npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; | ||
| 949 | 1142 | ||
| 950 | /* handle cases where a bo spans several pdes */ | 1143 | /* estimate number of dw needed */ |
| 951 | npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - | 1144 | /* semaphore, fence and padding */ |
| 952 | (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; | 1145 | ndw = 32; |
| 1146 | |||
| 1147 | if (RADEON_VM_BLOCK_SIZE > 11) | ||
| 1148 | /* reserve space for one header for every 2k dwords */ | ||
| 1149 | ndw += (nptes >> 11) * 4; | ||
| 1150 | else | ||
| 1151 | /* reserve space for one header for | ||
| 1152 | every (1 << BLOCK_SIZE) entries */ | ||
| 1153 | ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; | ||
| 953 | 1154 | ||
| 954 | /* reserve space for one header for every 2k dwords */ | ||
| 955 | ndw += (nptes >> 11) * 3; | ||
| 956 | /* reserve space for pte addresses */ | 1155 | /* reserve space for pte addresses */ |
| 957 | ndw += nptes * 2; | 1156 | ndw += nptes * 2; |
| 958 | 1157 | ||
| 959 | /* reserve space for one header for every 2k dwords */ | 1158 | /* reserve space for one header for every 2k dwords */ |
| 960 | ndw += (npdes >> 11) * 3; | 1159 | ndw += (npdes >> 11) * 4; |
| 1160 | |||
| 961 | /* reserve space for pde addresses */ | 1161 | /* reserve space for pde addresses */ |
| 962 | ndw += npdes * 2; | 1162 | ndw += npdes * 2; |
| 963 | 1163 | ||
| @@ -971,22 +1171,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
| 971 | radeon_fence_note_sync(vm->fence, ridx); | 1171 | radeon_fence_note_sync(vm->fence, ridx); |
| 972 | } | 1172 | } |
| 973 | 1173 | ||
| 974 | /* update page table entries */ | 1174 | r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); |
| 975 | pe = vm->pd_gpu_addr; | 1175 | if (r) { |
| 976 | pe += radeon_vm_directory_size(rdev); | 1176 | radeon_ring_unlock_undo(rdev, ring); |
| 977 | pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; | 1177 | return r; |
| 978 | 1178 | } | |
| 979 | radeon_asic_vm_set_page(rdev, pe, addr, nptes, | ||
| 980 | RADEON_GPU_PAGE_SIZE, bo_va->flags); | ||
| 981 | |||
| 982 | /* update page directory entries */ | ||
| 983 | addr = pe; | ||
| 984 | |||
| 985 | pe = vm->pd_gpu_addr; | ||
| 986 | pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8; | ||
| 987 | 1179 | ||
| 988 | radeon_asic_vm_set_page(rdev, pe, addr, npdes, | 1180 | radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, |
| 989 | RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); | 1181 | addr, bo_va->flags); |
| 990 | 1182 | ||
| 991 | radeon_fence_unref(&vm->fence); | 1183 | radeon_fence_unref(&vm->fence); |
| 992 | r = radeon_fence_emit(rdev, &vm->fence, ridx); | 1184 | r = radeon_fence_emit(rdev, &vm->fence, ridx); |
| @@ -997,6 +1189,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, | |||
| 997 | radeon_ring_unlock_commit(rdev, ring); | 1189 | radeon_ring_unlock_commit(rdev, ring); |
| 998 | radeon_semaphore_free(rdev, &sem, vm->fence); | 1190 | radeon_semaphore_free(rdev, &sem, vm->fence); |
| 999 | radeon_fence_unref(&vm->last_flush); | 1191 | radeon_fence_unref(&vm->last_flush); |
| 1192 | |||
| 1000 | return 0; | 1193 | return 0; |
| 1001 | } | 1194 | } |
| 1002 | 1195 | ||
| @@ -1056,31 +1249,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, | |||
| 1056 | * @rdev: radeon_device pointer | 1249 | * @rdev: radeon_device pointer |
| 1057 | * @vm: requested vm | 1250 | * @vm: requested vm |
| 1058 | * | 1251 | * |
| 1059 | * Init @vm (cayman+). | 1252 | * Init @vm fields (cayman+). |
| 1060 | * Map the IB pool and any other shared objects into the VM | ||
| 1061 | * by default as it's used by all VMs. | ||
| 1062 | * Returns 0 for success, error for failure. | ||
| 1063 | */ | 1253 | */ |
| 1064 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) | 1254 | void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
| 1065 | { | 1255 | { |
| 1066 | struct radeon_bo_va *bo_va; | ||
| 1067 | int r; | ||
| 1068 | |||
| 1069 | vm->id = 0; | 1256 | vm->id = 0; |
| 1070 | vm->fence = NULL; | 1257 | vm->fence = NULL; |
| 1071 | vm->last_pfn = 0; | ||
| 1072 | mutex_init(&vm->mutex); | 1258 | mutex_init(&vm->mutex); |
| 1073 | INIT_LIST_HEAD(&vm->list); | 1259 | INIT_LIST_HEAD(&vm->list); |
| 1074 | INIT_LIST_HEAD(&vm->va); | 1260 | INIT_LIST_HEAD(&vm->va); |
| 1075 | |||
| 1076 | /* map the ib pool buffer at 0 in virtual address space, set | ||
| 1077 | * read only | ||
| 1078 | */ | ||
| 1079 | bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); | ||
| 1080 | r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, | ||
| 1081 | RADEON_VM_PAGE_READABLE | | ||
| 1082 | RADEON_VM_PAGE_SNOOPED); | ||
| 1083 | return r; | ||
| 1084 | } | 1261 | } |
| 1085 | 1262 | ||
| 1086 | /** | 1263 | /** |
| @@ -1102,17 +1279,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) | |||
| 1102 | radeon_vm_free_pt(rdev, vm); | 1279 | radeon_vm_free_pt(rdev, vm); |
| 1103 | mutex_unlock(&rdev->vm_manager.lock); | 1280 | mutex_unlock(&rdev->vm_manager.lock); |
| 1104 | 1281 | ||
| 1105 | /* remove all bo at this point non are busy any more because unbind | ||
| 1106 | * waited for the last vm fence to signal | ||
| 1107 | */ | ||
| 1108 | r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); | ||
| 1109 | if (!r) { | ||
| 1110 | bo_va = radeon_vm_bo_find(vm, rdev->ring_tmp_bo.bo); | ||
| 1111 | list_del_init(&bo_va->bo_list); | ||
| 1112 | list_del_init(&bo_va->vm_list); | ||
| 1113 | radeon_bo_unreserve(rdev->ring_tmp_bo.bo); | ||
| 1114 | kfree(bo_va); | ||
| 1115 | } | ||
| 1116 | if (!list_empty(&vm->va)) { | 1282 | if (!list_empty(&vm->va)) { |
| 1117 | dev_err(rdev->dev, "still active bo inside vm\n"); | 1283 | dev_err(rdev->dev, "still active bo inside vm\n"); |
| 1118 | } | 1284 | } |
