aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/radeon_gart.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 02:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 02:29:23 -0400
commit612a9aab56a93533e76e3ad91642db7033e03b69 (patch)
tree8402096973f67af941f9392f7da06cca03e0b58a /drivers/gpu/drm/radeon/radeon_gart.c
parent3a494318b14b1bc0f59d2d6ce84c505c74d82d2a (diff)
parent268d28371cd326be4dfcd7eba5917bf4b9d30c8f (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm merge (part 1) from Dave Airlie: "So first of all my tree and uapi stuff has a conflict mess, its my fault as the nouveau stuff didn't hit -next as were trying to rebase regressions out of it before we merged. Highlights: - SH mobile modesetting driver and associated helpers - some DRM core documentation - i915 modesetting rework, haswell hdmi, haswell and vlv fixes, write combined pte writing, ilk rc6 support, - nouveau: major driver rework into a hw core driver, makes features like SLI a lot saner to implement, - psb: add eDP/DP support for Cedarview - radeon: 2 layer page tables, async VM pte updates, better PLL selection for > 2 screens, better ACPI interactions The rest is general grab bag of fixes. So why part 1? well I have the exynos pull req which came in a bit late but was waiting for me to do something they shouldn't have and it looks fairly safe, and David Howells has some more header cleanups he'd like me to pull, that seem like a good idea, but I'd like to get this merge out of the way so -next dosen't get blocked." Tons of conflicts mostly due to silly include line changes, but mostly mindless. A few other small semantic conflicts too, noted from Dave's pre-merged branch. * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (447 commits) drm/nv98/crypt: fix fuc build with latest envyas drm/nouveau/devinit: fixup various issues with subdev ctor/init ordering drm/nv41/vm: fix and enable use of "real" pciegart drm/nv44/vm: fix and enable use of "real" pciegart drm/nv04/dmaobj: fixup vm target handling in preparation for nv4x pcie drm/nouveau: store supported dma mask in vmmgr drm/nvc0/ibus: initial implementation of subdev drm/nouveau/therm: add support for fan-control modes drm/nouveau/hwmon: rename pwm0* to pmw1* to follow hwmon's rules drm/nouveau/therm: calculate the pwm divisor on nv50+ drm/nouveau/fan: rewrite the fan tachometer driver to get more precision, faster drm/nouveau/therm: move thermal-related functions to the therm subdev drm/nouveau/bios: parse the pwm divisor from the perf table drm/nouveau/therm: use the EXTDEV table to detect i2c monitoring devices drm/nouveau/therm: rework thermal table parsing drm/nouveau/gpio: expose the PWM/TOGGLE parameter found in the gpio vbios table drm/nouveau: fix pm initialization order drm/nouveau/bios: check that fixed tvdac gpio data is valid before using it drm/nouveau: log channel debug/error messages from client object rather than drm client drm/nouveau: have drm debugging macros build on top of core macros ...
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_gart.c')
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c602
1 files changed, 368 insertions, 234 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 33cc03e310fd..f0c06d196b75 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -423,6 +423,18 @@ void radeon_gart_fini(struct radeon_device *rdev)
423 */ 423 */
424 424
425/** 425/**
426 * radeon_vm_directory_size - returns the size of the page directory in bytes
427 *
428 * @rdev: radeon_device pointer
429 *
430 * Calculate the size of the page directory in bytes (cayman+).
431 */
432static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
433{
434 return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8;
435}
436
437/**
426 * radeon_vm_manager_init - init the vm manager 438 * radeon_vm_manager_init - init the vm manager
427 * 439 *
428 * @rdev: radeon_device pointer 440 * @rdev: radeon_device pointer
@@ -435,12 +447,15 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
435 struct radeon_vm *vm; 447 struct radeon_vm *vm;
436 struct radeon_bo_va *bo_va; 448 struct radeon_bo_va *bo_va;
437 int r; 449 int r;
450 unsigned size;
438 451
439 if (!rdev->vm_manager.enabled) { 452 if (!rdev->vm_manager.enabled) {
440 /* mark first vm as always in use, it's the system one */
441 /* allocate enough for 2 full VM pts */ 453 /* allocate enough for 2 full VM pts */
454 size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
455 size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8);
456 size *= 2;
442 r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, 457 r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
443 rdev->vm_manager.max_pfn * 8 * 2, 458 size,
444 RADEON_GEM_DOMAIN_VRAM); 459 RADEON_GEM_DOMAIN_VRAM);
445 if (r) { 460 if (r) {
446 dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", 461 dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
@@ -448,10 +463,10 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
448 return r; 463 return r;
449 } 464 }
450 465
451 r = rdev->vm_manager.funcs->init(rdev); 466 r = radeon_asic_vm_init(rdev);
452 if (r) 467 if (r)
453 return r; 468 return r;
454 469
455 rdev->vm_manager.enabled = true; 470 rdev->vm_manager.enabled = true;
456 471
457 r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); 472 r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
@@ -461,73 +476,36 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
461 476
462 /* restore page table */ 477 /* restore page table */
463 list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { 478 list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
464 if (vm->id == -1) 479 if (vm->sa_bo == NULL)
465 continue; 480 continue;
466 481
467 list_for_each_entry(bo_va, &vm->va, vm_list) { 482 list_for_each_entry(bo_va, &vm->va, vm_list) {
468 struct ttm_mem_reg *mem = NULL;
469 if (bo_va->valid)
470 mem = &bo_va->bo->tbo.mem;
471
472 bo_va->valid = false; 483 bo_va->valid = false;
473 r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem);
474 if (r) {
475 DRM_ERROR("Failed to update pte for vm %d!\n", vm->id);
476 }
477 }
478
479 r = rdev->vm_manager.funcs->bind(rdev, vm, vm->id);
480 if (r) {
481 DRM_ERROR("Failed to bind vm %d!\n", vm->id);
482 } 484 }
483 } 485 }
484 return 0; 486 return 0;
485} 487}
486 488
487/* global mutex must be lock */
488/** 489/**
489 * radeon_vm_unbind_locked - unbind a specific vm 490 * radeon_vm_free_pt - free the page table for a specific vm
490 * 491 *
491 * @rdev: radeon_device pointer 492 * @rdev: radeon_device pointer
492 * @vm: vm to unbind 493 * @vm: vm to unbind
493 * 494 *
494 * Unbind the requested vm (cayman+). 495 * Free the page table of a specific vm (cayman+).
495 * Wait for use of the VM to finish, then unbind the page table, 496 *
496 * and free the page table memory. 497 * Global and local mutex must be lock!
497 */ 498 */
498static void radeon_vm_unbind_locked(struct radeon_device *rdev, 499static void radeon_vm_free_pt(struct radeon_device *rdev,
499 struct radeon_vm *vm) 500 struct radeon_vm *vm)
500{ 501{
501 struct radeon_bo_va *bo_va; 502 struct radeon_bo_va *bo_va;
502 503
503 if (vm->id == -1) { 504 if (!vm->sa_bo)
504 return; 505 return;
505 }
506 506
507 /* wait for vm use to end */
508 while (vm->fence) {
509 int r;
510 r = radeon_fence_wait(vm->fence, false);
511 if (r)
512 DRM_ERROR("error while waiting for fence: %d\n", r);
513 if (r == -EDEADLK) {
514 mutex_unlock(&rdev->vm_manager.lock);
515 r = radeon_gpu_reset(rdev);
516 mutex_lock(&rdev->vm_manager.lock);
517 if (!r)
518 continue;
519 }
520 break;
521 }
522 radeon_fence_unref(&vm->fence);
523
524 /* hw unbind */
525 rdev->vm_manager.funcs->unbind(rdev, vm);
526 rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
527 list_del_init(&vm->list); 507 list_del_init(&vm->list);
528 vm->id = -1; 508 radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence);
529 radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
530 vm->pt = NULL;
531 509
532 list_for_each_entry(bo_va, &vm->va, vm_list) { 510 list_for_each_entry(bo_va, &vm->va, vm_list) {
533 bo_va->valid = false; 511 bo_va->valid = false;
@@ -544,16 +522,22 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
544void radeon_vm_manager_fini(struct radeon_device *rdev) 522void radeon_vm_manager_fini(struct radeon_device *rdev)
545{ 523{
546 struct radeon_vm *vm, *tmp; 524 struct radeon_vm *vm, *tmp;
525 int i;
547 526
548 if (!rdev->vm_manager.enabled) 527 if (!rdev->vm_manager.enabled)
549 return; 528 return;
550 529
551 mutex_lock(&rdev->vm_manager.lock); 530 mutex_lock(&rdev->vm_manager.lock);
552 /* unbind all active vm */ 531 /* free all allocated page tables */
553 list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { 532 list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
554 radeon_vm_unbind_locked(rdev, vm); 533 mutex_lock(&vm->mutex);
534 radeon_vm_free_pt(rdev, vm);
535 mutex_unlock(&vm->mutex);
555 } 536 }
556 rdev->vm_manager.funcs->fini(rdev); 537 for (i = 0; i < RADEON_NUM_VM; ++i) {
538 radeon_fence_unref(&rdev->vm_manager.active[i]);
539 }
540 radeon_asic_vm_fini(rdev);
557 mutex_unlock(&rdev->vm_manager.lock); 541 mutex_unlock(&rdev->vm_manager.lock);
558 542
559 radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); 543 radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
@@ -561,46 +545,34 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
561 rdev->vm_manager.enabled = false; 545 rdev->vm_manager.enabled = false;
562} 546}
563 547
564/* global mutex must be locked */
565/** 548/**
566 * radeon_vm_unbind - locked version of unbind 549 * radeon_vm_alloc_pt - allocates a page table for a VM
567 *
568 * @rdev: radeon_device pointer
569 * @vm: vm to unbind
570 *
571 * Locked version that wraps radeon_vm_unbind_locked (cayman+).
572 */
573void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
574{
575 mutex_lock(&vm->mutex);
576 radeon_vm_unbind_locked(rdev, vm);
577 mutex_unlock(&vm->mutex);
578}
579
580/* global and local mutex must be locked */
581/**
582 * radeon_vm_bind - bind a page table to a VMID
583 * 550 *
584 * @rdev: radeon_device pointer 551 * @rdev: radeon_device pointer
585 * @vm: vm to bind 552 * @vm: vm to bind
586 * 553 *
587 * Bind the requested vm (cayman+). 554 * Allocate a page table for the requested vm (cayman+).
588 * Suballocate memory for the page table, allocate a VMID 555 * Also starts to populate the page table.
589 * and bind the page table to it, and finally start to populate
590 * the page table.
591 * Returns 0 for success, error for failure. 556 * Returns 0 for success, error for failure.
557 *
558 * Global and local mutex must be locked!
592 */ 559 */
593int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) 560int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
594{ 561{
595 struct radeon_vm *vm_evict; 562 struct radeon_vm *vm_evict;
596 unsigned i; 563 int r;
597 int id = -1, r; 564 u64 *pd_addr;
565 int tables_size;
598 566
599 if (vm == NULL) { 567 if (vm == NULL) {
600 return -EINVAL; 568 return -EINVAL;
601 } 569 }
602 570
603 if (vm->id != -1) { 571 /* allocate enough to cover the current VM size */
572 tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
573 tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8);
574
575 if (vm->sa_bo != NULL) {
604 /* update lru */ 576 /* update lru */
605 list_del_init(&vm->list); 577 list_del_init(&vm->list);
606 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); 578 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
@@ -609,98 +581,215 @@ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
609 581
610retry: 582retry:
611 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, 583 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo,
612 RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8), 584 tables_size, RADEON_GPU_PAGE_SIZE, false);
613 RADEON_GPU_PAGE_SIZE, false); 585 if (r == -ENOMEM) {
614 if (r) {
615 if (list_empty(&rdev->vm_manager.lru_vm)) { 586 if (list_empty(&rdev->vm_manager.lru_vm)) {
616 return r; 587 return r;
617 } 588 }
618 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); 589 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
619 radeon_vm_unbind(rdev, vm_evict); 590 mutex_lock(&vm_evict->mutex);
591 radeon_vm_free_pt(rdev, vm_evict);
592 mutex_unlock(&vm_evict->mutex);
620 goto retry; 593 goto retry;
594
595 } else if (r) {
596 return r;
621 } 597 }
622 vm->pt = radeon_sa_bo_cpu_addr(vm->sa_bo);
623 vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
624 memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
625 598
626retry_id: 599 pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo);
627 /* search for free vm */ 600 vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
628 for (i = 0; i < rdev->vm_manager.nvm; i++) { 601 memset(pd_addr, 0, tables_size);
629 if (!(rdev->vm_manager.use_bitmap & (1 << i))) { 602
630 id = i; 603 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
631 break; 604 return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
605 &rdev->ring_tmp_bo.bo->tbo.mem);
606}
607
608/**
609 * radeon_vm_grab_id - allocate the next free VMID
610 *
611 * @rdev: radeon_device pointer
612 * @vm: vm to allocate id for
613 * @ring: ring we want to submit job to
614 *
615 * Allocate an id for the vm (cayman+).
616 * Returns the fence we need to sync to (if any).
617 *
618 * Global and local mutex must be locked!
619 */
620struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
621 struct radeon_vm *vm, int ring)
622{
623 struct radeon_fence *best[RADEON_NUM_RINGS] = {};
624 unsigned choices[2] = {};
625 unsigned i;
626
627 /* check if the id is still valid */
628 if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id])
629 return NULL;
630
631 /* we definately need to flush */
632 radeon_fence_unref(&vm->last_flush);
633
634 /* skip over VMID 0, since it is the system VM */
635 for (i = 1; i < rdev->vm_manager.nvm; ++i) {
636 struct radeon_fence *fence = rdev->vm_manager.active[i];
637
638 if (fence == NULL) {
639 /* found a free one */
640 vm->id = i;
641 return NULL;
642 }
643
644 if (radeon_fence_is_earlier(fence, best[fence->ring])) {
645 best[fence->ring] = fence;
646 choices[fence->ring == ring ? 0 : 1] = i;
632 } 647 }
633 } 648 }
634 /* evict vm if necessary */ 649
635 if (id == -1) { 650 for (i = 0; i < 2; ++i) {
636 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); 651 if (choices[i]) {
637 radeon_vm_unbind(rdev, vm_evict); 652 vm->id = choices[i];
638 goto retry_id; 653 return rdev->vm_manager.active[choices[i]];
654 }
639 } 655 }
640 656
641 /* do hw bind */ 657 /* should never happen */
642 r = rdev->vm_manager.funcs->bind(rdev, vm, id); 658 BUG();
643 if (r) { 659 return NULL;
644 radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); 660}
645 return r; 661
662/**
663 * radeon_vm_fence - remember fence for vm
664 *
665 * @rdev: radeon_device pointer
666 * @vm: vm we want to fence
667 * @fence: fence to remember
668 *
669 * Fence the vm (cayman+).
670 * Set the fence used to protect page table and id.
671 *
672 * Global and local mutex must be locked!
673 */
674void radeon_vm_fence(struct radeon_device *rdev,
675 struct radeon_vm *vm,
676 struct radeon_fence *fence)
677{
678 radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
679 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
680
681 radeon_fence_unref(&vm->fence);
682 vm->fence = radeon_fence_ref(fence);
683}
684
685/**
686 * radeon_vm_bo_find - find the bo_va for a specific vm & bo
687 *
688 * @vm: requested vm
689 * @bo: requested buffer object
690 *
691 * Find @bo inside the requested vm (cayman+).
692 * Search inside the @bos vm list for the requested vm
693 * Returns the found bo_va or NULL if none is found
694 *
695 * Object has to be reserved!
696 */
697struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
698 struct radeon_bo *bo)
699{
700 struct radeon_bo_va *bo_va;
701
702 list_for_each_entry(bo_va, &bo->va, bo_list) {
703 if (bo_va->vm == vm) {
704 return bo_va;
705 }
646 } 706 }
647 rdev->vm_manager.use_bitmap |= 1 << id; 707 return NULL;
648 vm->id = id;
649 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
650 return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
651 &rdev->ring_tmp_bo.bo->tbo.mem);
652} 708}
653 709
654/* object have to be reserved */
655/** 710/**
656 * radeon_vm_bo_add - add a bo to a specific vm 711 * radeon_vm_bo_add - add a bo to a specific vm
657 * 712 *
658 * @rdev: radeon_device pointer 713 * @rdev: radeon_device pointer
659 * @vm: requested vm 714 * @vm: requested vm
660 * @bo: radeon buffer object 715 * @bo: radeon buffer object
661 * @offset: requested offset of the buffer in the VM address space
662 * @flags: attributes of pages (read/write/valid/etc.)
663 * 716 *
664 * Add @bo into the requested vm (cayman+). 717 * Add @bo into the requested vm (cayman+).
665 * Add @bo to the list of bos associated with the vm and validate 718 * Add @bo to the list of bos associated with the vm
666 * the offset requested within the vm address space. 719 * Returns newly added bo_va or NULL for failure
667 * Returns 0 for success, error for failure. 720 *
721 * Object has to be reserved!
668 */ 722 */
669int radeon_vm_bo_add(struct radeon_device *rdev, 723struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
670 struct radeon_vm *vm, 724 struct radeon_vm *vm,
671 struct radeon_bo *bo, 725 struct radeon_bo *bo)
672 uint64_t offset,
673 uint32_t flags)
674{ 726{
675 struct radeon_bo_va *bo_va, *tmp; 727 struct radeon_bo_va *bo_va;
676 struct list_head *head;
677 uint64_t size = radeon_bo_size(bo), last_offset = 0;
678 unsigned last_pfn;
679 728
680 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 729 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
681 if (bo_va == NULL) { 730 if (bo_va == NULL) {
682 return -ENOMEM; 731 return NULL;
683 } 732 }
684 bo_va->vm = vm; 733 bo_va->vm = vm;
685 bo_va->bo = bo; 734 bo_va->bo = bo;
686 bo_va->soffset = offset; 735 bo_va->soffset = 0;
687 bo_va->eoffset = offset + size; 736 bo_va->eoffset = 0;
688 bo_va->flags = flags; 737 bo_va->flags = 0;
689 bo_va->valid = false; 738 bo_va->valid = false;
739 bo_va->ref_count = 1;
690 INIT_LIST_HEAD(&bo_va->bo_list); 740 INIT_LIST_HEAD(&bo_va->bo_list);
691 INIT_LIST_HEAD(&bo_va->vm_list); 741 INIT_LIST_HEAD(&bo_va->vm_list);
692 /* make sure object fit at this offset */
693 if (bo_va->soffset >= bo_va->eoffset) {
694 kfree(bo_va);
695 return -EINVAL;
696 }
697 742
698 last_pfn = bo_va->eoffset / RADEON_GPU_PAGE_SIZE; 743 mutex_lock(&vm->mutex);
699 if (last_pfn > rdev->vm_manager.max_pfn) { 744 list_add(&bo_va->vm_list, &vm->va);
700 kfree(bo_va); 745 list_add_tail(&bo_va->bo_list, &bo->va);
701 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", 746 mutex_unlock(&vm->mutex);
702 last_pfn, rdev->vm_manager.max_pfn); 747
703 return -EINVAL; 748 return bo_va;
749}
750
751/**
752 * radeon_vm_bo_set_addr - set bos virtual address inside a vm
753 *
754 * @rdev: radeon_device pointer
755 * @bo_va: bo_va to store the address
756 * @soffset: requested offset of the buffer in the VM address space
757 * @flags: attributes of pages (read/write/valid/etc.)
758 *
759 * Set offset of @bo_va (cayman+).
760 * Validate and set the offset requested within the vm address space.
761 * Returns 0 for success, error for failure.
762 *
763 * Object has to be reserved!
764 */
765int radeon_vm_bo_set_addr(struct radeon_device *rdev,
766 struct radeon_bo_va *bo_va,
767 uint64_t soffset,
768 uint32_t flags)
769{
770 uint64_t size = radeon_bo_size(bo_va->bo);
771 uint64_t eoffset, last_offset = 0;
772 struct radeon_vm *vm = bo_va->vm;
773 struct radeon_bo_va *tmp;
774 struct list_head *head;
775 unsigned last_pfn;
776
777 if (soffset) {
778 /* make sure object fit at this offset */
779 eoffset = soffset + size;
780 if (soffset >= eoffset) {
781 return -EINVAL;
782 }
783
784 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
785 if (last_pfn > rdev->vm_manager.max_pfn) {
786 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
787 last_pfn, rdev->vm_manager.max_pfn);
788 return -EINVAL;
789 }
790
791 } else {
792 eoffset = last_pfn = 0;
704 } 793 }
705 794
706 mutex_lock(&vm->mutex); 795 mutex_lock(&vm->mutex);
@@ -713,7 +802,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
713 if (last_pfn > vm->last_pfn) { 802 if (last_pfn > vm->last_pfn) {
714 /* grow va space 32M by 32M */ 803 /* grow va space 32M by 32M */
715 unsigned align = ((32 << 20) >> 12) - 1; 804 unsigned align = ((32 << 20) >> 12) - 1;
716 radeon_vm_unbind_locked(rdev, vm); 805 radeon_vm_free_pt(rdev, vm);
717 vm->last_pfn = (last_pfn + align) & ~align; 806 vm->last_pfn = (last_pfn + align) & ~align;
718 } 807 }
719 mutex_unlock(&rdev->vm_manager.lock); 808 mutex_unlock(&rdev->vm_manager.lock);
@@ -721,68 +810,60 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
721 head = &vm->va; 810 head = &vm->va;
722 last_offset = 0; 811 last_offset = 0;
723 list_for_each_entry(tmp, &vm->va, vm_list) { 812 list_for_each_entry(tmp, &vm->va, vm_list) {
724 if (bo_va->soffset >= last_offset && bo_va->eoffset < tmp->soffset) { 813 if (bo_va == tmp) {
814 /* skip over currently modified bo */
815 continue;
816 }
817
818 if (soffset >= last_offset && eoffset <= tmp->soffset) {
725 /* bo can be added before this one */ 819 /* bo can be added before this one */
726 break; 820 break;
727 } 821 }
728 if (bo_va->soffset >= tmp->soffset && bo_va->soffset < tmp->eoffset) { 822 if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
729 /* bo and tmp overlap, invalid offset */ 823 /* bo and tmp overlap, invalid offset */
730 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", 824 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
731 bo, (unsigned)bo_va->soffset, tmp->bo, 825 bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
732 (unsigned)tmp->soffset, (unsigned)tmp->eoffset); 826 (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
733 kfree(bo_va);
734 mutex_unlock(&vm->mutex); 827 mutex_unlock(&vm->mutex);
735 return -EINVAL; 828 return -EINVAL;
736 } 829 }
737 last_offset = tmp->eoffset; 830 last_offset = tmp->eoffset;
738 head = &tmp->vm_list; 831 head = &tmp->vm_list;
739 } 832 }
740 list_add(&bo_va->vm_list, head); 833
741 list_add_tail(&bo_va->bo_list, &bo->va); 834 bo_va->soffset = soffset;
835 bo_va->eoffset = eoffset;
836 bo_va->flags = flags;
837 bo_va->valid = false;
838 list_move(&bo_va->vm_list, head);
839
742 mutex_unlock(&vm->mutex); 840 mutex_unlock(&vm->mutex);
743 return 0; 841 return 0;
744} 842}
745 843
746/** 844/**
747 * radeon_vm_get_addr - get the physical address of the page 845 * radeon_vm_map_gart - get the physical address of a gart page
748 * 846 *
749 * @rdev: radeon_device pointer 847 * @rdev: radeon_device pointer
750 * @mem: ttm mem 848 * @addr: the unmapped addr
751 * @pfn: pfn
752 * 849 *
753 * Look up the physical address of the page that the pte resolves 850 * Look up the physical address of the page that the pte resolves
754 * to (cayman+). 851 * to (cayman+).
755 * Returns the physical address of the page. 852 * Returns the physical address of the page.
756 */ 853 */
757static u64 radeon_vm_get_addr(struct radeon_device *rdev, 854uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
758 struct ttm_mem_reg *mem,
759 unsigned pfn)
760{ 855{
761 u64 addr = 0; 856 uint64_t result;
762 857
763 switch (mem->mem_type) { 858 /* page table offset */
764 case TTM_PL_VRAM: 859 result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
765 addr = (mem->start << PAGE_SHIFT); 860
766 addr += pfn * RADEON_GPU_PAGE_SIZE; 861 /* in case cpu page size != gpu page size*/
767 addr += rdev->vm_manager.vram_base_offset; 862 result |= addr & (~PAGE_MASK);
768 break; 863
769 case TTM_PL_TT: 864 return result;
770 /* offset inside page table */
771 addr = mem->start << PAGE_SHIFT;
772 addr += pfn * RADEON_GPU_PAGE_SIZE;
773 addr = addr >> PAGE_SHIFT;
774 /* page table offset */
775 addr = rdev->gart.pages_addr[addr];
776 /* in case cpu page size != gpu page size*/
777 addr += (pfn * RADEON_GPU_PAGE_SIZE) & (~PAGE_MASK);
778 break;
779 default:
780 break;
781 }
782 return addr;
783} 865}
784 866
785/* object have to be reserved & global and local mutex must be locked */
786/** 867/**
787 * radeon_vm_bo_update_pte - map a bo into the vm page table 868 * radeon_vm_bo_update_pte - map a bo into the vm page table
788 * 869 *
@@ -793,103 +874,160 @@ static u64 radeon_vm_get_addr(struct radeon_device *rdev,
793 * 874 *
794 * Fill in the page table entries for @bo (cayman+). 875 * Fill in the page table entries for @bo (cayman+).
795 * Returns 0 for success, -EINVAL for failure. 876 * Returns 0 for success, -EINVAL for failure.
877 *
878 * Object have to be reserved & global and local mutex must be locked!
796 */ 879 */
797int radeon_vm_bo_update_pte(struct radeon_device *rdev, 880int radeon_vm_bo_update_pte(struct radeon_device *rdev,
798 struct radeon_vm *vm, 881 struct radeon_vm *vm,
799 struct radeon_bo *bo, 882 struct radeon_bo *bo,
800 struct ttm_mem_reg *mem) 883 struct ttm_mem_reg *mem)
801{ 884{
885 unsigned ridx = rdev->asic->vm.pt_ring_index;
886 struct radeon_ring *ring = &rdev->ring[ridx];
887 struct radeon_semaphore *sem = NULL;
802 struct radeon_bo_va *bo_va; 888 struct radeon_bo_va *bo_va;
803 unsigned ngpu_pages, i; 889 unsigned nptes, npdes, ndw;
804 uint64_t addr = 0, pfn; 890 uint64_t pe, addr;
805 uint32_t flags; 891 uint64_t pfn;
892 int r;
806 893
807 /* nothing to do if vm isn't bound */ 894 /* nothing to do if vm isn't bound */
808 if (vm->id == -1) 895 if (vm->sa_bo == NULL)
809 return 0; 896 return 0;
810 897
811 bo_va = radeon_bo_va(bo, vm); 898 bo_va = radeon_vm_bo_find(vm, bo);
812 if (bo_va == NULL) { 899 if (bo_va == NULL) {
813 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 900 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
814 return -EINVAL; 901 return -EINVAL;
815 } 902 }
816 903
817 if (bo_va->valid && mem) 904 if (!bo_va->soffset) {
905 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
906 bo, vm);
907 return -EINVAL;
908 }
909
910 if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
818 return 0; 911 return 0;
819 912
820 ngpu_pages = radeon_bo_ngpu_pages(bo);
821 bo_va->flags &= ~RADEON_VM_PAGE_VALID; 913 bo_va->flags &= ~RADEON_VM_PAGE_VALID;
822 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 914 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
823 if (mem) { 915 if (mem) {
916 addr = mem->start << PAGE_SHIFT;
824 if (mem->mem_type != TTM_PL_SYSTEM) { 917 if (mem->mem_type != TTM_PL_SYSTEM) {
825 bo_va->flags |= RADEON_VM_PAGE_VALID; 918 bo_va->flags |= RADEON_VM_PAGE_VALID;
826 bo_va->valid = true; 919 bo_va->valid = true;
827 } 920 }
828 if (mem->mem_type == TTM_PL_TT) { 921 if (mem->mem_type == TTM_PL_TT) {
829 bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 922 bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
923 } else {
924 addr += rdev->vm_manager.vram_base_offset;
830 } 925 }
926 } else {
927 addr = 0;
928 bo_va->valid = false;
831 } 929 }
832 pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; 930
833 flags = rdev->vm_manager.funcs->page_flags(rdev, bo_va->vm, bo_va->flags); 931 if (vm->fence && radeon_fence_signaled(vm->fence)) {
834 for (i = 0, addr = 0; i < ngpu_pages; i++) { 932 radeon_fence_unref(&vm->fence);
835 if (mem && bo_va->valid) { 933 }
836 addr = radeon_vm_get_addr(rdev, mem, i); 934
935 if (vm->fence && vm->fence->ring != ridx) {
936 r = radeon_semaphore_create(rdev, &sem);
937 if (r) {
938 return r;
837 } 939 }
838 rdev->vm_manager.funcs->set_page(rdev, bo_va->vm, i + pfn, addr, flags);
839 } 940 }
840 rdev->vm_manager.funcs->tlb_flush(rdev, bo_va->vm); 941
942 /* estimate number of dw needed */
943 /* reserve space for 32-bit padding */
944 ndw = 32;
945
946 nptes = radeon_bo_ngpu_pages(bo);
947
948 pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE);
949
950 /* handle cases where a bo spans several pdes */
951 npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) -
952 (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE;
953
954 /* reserve space for one header for every 2k dwords */
955 ndw += (nptes >> 11) * 3;
956 /* reserve space for pte addresses */
957 ndw += nptes * 2;
958
959 /* reserve space for one header for every 2k dwords */
960 ndw += (npdes >> 11) * 3;
961 /* reserve space for pde addresses */
962 ndw += npdes * 2;
963
964 r = radeon_ring_lock(rdev, ring, ndw);
965 if (r) {
966 return r;
967 }
968
969 if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
970 radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
971 radeon_fence_note_sync(vm->fence, ridx);
972 }
973
974 /* update page table entries */
975 pe = vm->pd_gpu_addr;
976 pe += radeon_vm_directory_size(rdev);
977 pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8;
978
979 radeon_asic_vm_set_page(rdev, pe, addr, nptes,
980 RADEON_GPU_PAGE_SIZE, bo_va->flags);
981
982 /* update page directory entries */
983 addr = pe;
984
985 pe = vm->pd_gpu_addr;
986 pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8;
987
988 radeon_asic_vm_set_page(rdev, pe, addr, npdes,
989 RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID);
990
991 radeon_fence_unref(&vm->fence);
992 r = radeon_fence_emit(rdev, &vm->fence, ridx);
993 if (r) {
994 radeon_ring_unlock_undo(rdev, ring);
995 return r;
996 }
997 radeon_ring_unlock_commit(rdev, ring);
998 radeon_semaphore_free(rdev, &sem, vm->fence);
999 radeon_fence_unref(&vm->last_flush);
841 return 0; 1000 return 0;
842} 1001}
843 1002
844/* object have to be reserved */
845/** 1003/**
846 * radeon_vm_bo_rmv - remove a bo to a specific vm 1004 * radeon_vm_bo_rmv - remove a bo to a specific vm
847 * 1005 *
848 * @rdev: radeon_device pointer 1006 * @rdev: radeon_device pointer
849 * @vm: requested vm 1007 * @bo_va: requested bo_va
850 * @bo: radeon buffer object
851 * 1008 *
852 * Remove @bo from the requested vm (cayman+). 1009 * Remove @bo_va->bo from the requested vm (cayman+).
853 * Remove @bo from the list of bos associated with the vm and 1010 * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
854 * remove the ptes for @bo in the page table. 1011 * remove the ptes for @bo_va in the page table.
855 * Returns 0 for success. 1012 * Returns 0 for success.
1013 *
1014 * Object have to be reserved!
856 */ 1015 */
857int radeon_vm_bo_rmv(struct radeon_device *rdev, 1016int radeon_vm_bo_rmv(struct radeon_device *rdev,
858 struct radeon_vm *vm, 1017 struct radeon_bo_va *bo_va)
859 struct radeon_bo *bo)
860{ 1018{
861 struct radeon_bo_va *bo_va;
862 int r; 1019 int r;
863 1020
864 bo_va = radeon_bo_va(bo, vm);
865 if (bo_va == NULL)
866 return 0;
867
868 /* wait for va use to end */
869 while (bo_va->fence) {
870 r = radeon_fence_wait(bo_va->fence, false);
871 if (r) {
872 DRM_ERROR("error while waiting for fence: %d\n", r);
873 }
874 if (r == -EDEADLK) {
875 r = radeon_gpu_reset(rdev);
876 if (!r)
877 continue;
878 }
879 break;
880 }
881 radeon_fence_unref(&bo_va->fence);
882
883 mutex_lock(&rdev->vm_manager.lock); 1021 mutex_lock(&rdev->vm_manager.lock);
884 mutex_lock(&vm->mutex); 1022 mutex_lock(&bo_va->vm->mutex);
885 radeon_vm_bo_update_pte(rdev, vm, bo, NULL); 1023 r = radeon_vm_bo_update_pte(rdev, bo_va->vm, bo_va->bo, NULL);
886 mutex_unlock(&rdev->vm_manager.lock); 1024 mutex_unlock(&rdev->vm_manager.lock);
887 list_del(&bo_va->vm_list); 1025 list_del(&bo_va->vm_list);
888 mutex_unlock(&vm->mutex); 1026 mutex_unlock(&bo_va->vm->mutex);
889 list_del(&bo_va->bo_list); 1027 list_del(&bo_va->bo_list);
890 1028
891 kfree(bo_va); 1029 kfree(bo_va);
892 return 0; 1030 return r;
893} 1031}
894 1032
895/** 1033/**
@@ -925,27 +1063,23 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
925 */ 1063 */
926int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1064int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
927{ 1065{
1066 struct radeon_bo_va *bo_va;
928 int r; 1067 int r;
929 1068
930 vm->id = -1; 1069 vm->id = 0;
931 vm->fence = NULL; 1070 vm->fence = NULL;
1071 vm->last_pfn = 0;
932 mutex_init(&vm->mutex); 1072 mutex_init(&vm->mutex);
933 INIT_LIST_HEAD(&vm->list); 1073 INIT_LIST_HEAD(&vm->list);
934 INIT_LIST_HEAD(&vm->va); 1074 INIT_LIST_HEAD(&vm->va);
935 /* SI requires equal sized PTs for all VMs, so always set 1075
936 * last_pfn to max_pfn. cayman allows variable sized
937 * pts so we can grow then as needed. Once we switch
938 * to two level pts we can unify this again.
939 */
940 if (rdev->family >= CHIP_TAHITI)
941 vm->last_pfn = rdev->vm_manager.max_pfn;
942 else
943 vm->last_pfn = 0;
944 /* map the ib pool buffer at 0 in virtual address space, set 1076 /* map the ib pool buffer at 0 in virtual address space, set
945 * read only 1077 * read only
946 */ 1078 */
947 r = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo, 0, 1079 bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo);
948 RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); 1080 r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET,
1081 RADEON_VM_PAGE_READABLE |
1082 RADEON_VM_PAGE_SNOOPED);
949 return r; 1083 return r;
950} 1084}
951 1085
@@ -965,7 +1099,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
965 1099
966 mutex_lock(&rdev->vm_manager.lock); 1100 mutex_lock(&rdev->vm_manager.lock);
967 mutex_lock(&vm->mutex); 1101 mutex_lock(&vm->mutex);
968 radeon_vm_unbind_locked(rdev, vm); 1102 radeon_vm_free_pt(rdev, vm);
969 mutex_unlock(&rdev->vm_manager.lock); 1103 mutex_unlock(&rdev->vm_manager.lock);
970 1104
971 /* remove all bo at this point non are busy any more because unbind 1105 /* remove all bo at this point non are busy any more because unbind
@@ -973,10 +1107,9 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
973 */ 1107 */
974 r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); 1108 r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
975 if (!r) { 1109 if (!r) {
976 bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); 1110 bo_va = radeon_vm_bo_find(vm, rdev->ring_tmp_bo.bo);
977 list_del_init(&bo_va->bo_list); 1111 list_del_init(&bo_va->bo_list);
978 list_del_init(&bo_va->vm_list); 1112 list_del_init(&bo_va->vm_list);
979 radeon_fence_unref(&bo_va->fence);
980 radeon_bo_unreserve(rdev->ring_tmp_bo.bo); 1113 radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
981 kfree(bo_va); 1114 kfree(bo_va);
982 } 1115 }
@@ -988,10 +1121,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
988 r = radeon_bo_reserve(bo_va->bo, false); 1121 r = radeon_bo_reserve(bo_va->bo, false);
989 if (!r) { 1122 if (!r) {
990 list_del_init(&bo_va->bo_list); 1123 list_del_init(&bo_va->bo_list);
991 radeon_fence_unref(&bo_va->fence);
992 radeon_bo_unreserve(bo_va->bo); 1124 radeon_bo_unreserve(bo_va->bo);
993 kfree(bo_va); 1125 kfree(bo_va);
994 } 1126 }
995 } 1127 }
1128 radeon_fence_unref(&vm->fence);
1129 radeon_fence_unref(&vm->last_flush);
996 mutex_unlock(&vm->mutex); 1130 mutex_unlock(&vm->mutex);
997} 1131}