diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-03-25 13:47:35 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-03-27 07:27:21 -0400 |
commit | 935aaa692ec5e4b7261ed7f17f962d7e978c542b (patch) | |
tree | 8096a1c7243b7464a761efd74f5cb55a6db7326d /drivers/gpu/drm/i915/i915_gem.c | |
parent | 692a576b9ddf8006f1559e14a5022c0a100440f1 (diff) |
drm/i915: drop gtt slowpath
With the proper prefault, it's extremely unlikely that we fall back
to the gtt slowpath.
So just kill it and use the shmem_pwrite path as fallback.
To further clean up the code, move the preparatory gem calls into the
respective pwrite functions. This way the gtt_fast->shmem fallback
is much more obvious.
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 183 |
1 files changed, 30 insertions, 153 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b253257c028e..23f1a6bcee73 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -484,30 +484,6 @@ fast_user_write(struct io_mapping *mapping, | |||
484 | return unwritten; | 484 | return unwritten; |
485 | } | 485 | } |
486 | 486 | ||
487 | /* Here's the write path which can sleep for | ||
488 | * page faults | ||
489 | */ | ||
490 | |||
491 | static inline void | ||
492 | slow_kernel_write(struct io_mapping *mapping, | ||
493 | loff_t gtt_base, int gtt_offset, | ||
494 | struct page *user_page, int user_offset, | ||
495 | int length) | ||
496 | { | ||
497 | char __iomem *dst_vaddr; | ||
498 | char *src_vaddr; | ||
499 | |||
500 | dst_vaddr = io_mapping_map_wc(mapping, gtt_base); | ||
501 | src_vaddr = kmap(user_page); | ||
502 | |||
503 | memcpy_toio(dst_vaddr + gtt_offset, | ||
504 | src_vaddr + user_offset, | ||
505 | length); | ||
506 | |||
507 | kunmap(user_page); | ||
508 | io_mapping_unmap(dst_vaddr); | ||
509 | } | ||
510 | |||
511 | /** | 487 | /** |
512 | * This is the fast pwrite path, where we copy the data directly from the | 488 | * This is the fast pwrite path, where we copy the data directly from the |
513 | * user into the GTT, uncached. | 489 | * user into the GTT, uncached. |
@@ -522,7 +498,19 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, | |||
522 | ssize_t remain; | 498 | ssize_t remain; |
523 | loff_t offset, page_base; | 499 | loff_t offset, page_base; |
524 | char __user *user_data; | 500 | char __user *user_data; |
525 | int page_offset, page_length; | 501 | int page_offset, page_length, ret; |
502 | |||
503 | ret = i915_gem_object_pin(obj, 0, true); | ||
504 | if (ret) | ||
505 | goto out; | ||
506 | |||
507 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
508 | if (ret) | ||
509 | goto out_unpin; | ||
510 | |||
511 | ret = i915_gem_object_put_fence(obj); | ||
512 | if (ret) | ||
513 | goto out_unpin; | ||
526 | 514 | ||
527 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 515 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
528 | remain = args->size; | 516 | remain = args->size; |
@@ -547,112 +535,19 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, | |||
547 | * retry in the slow path. | 535 | * retry in the slow path. |
548 | */ | 536 | */ |
549 | if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, | 537 | if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, |
550 | page_offset, user_data, page_length)) | 538 | page_offset, user_data, page_length)) { |
551 | return -EFAULT; | 539 | ret = -EFAULT; |
540 | goto out_unpin; | ||
541 | } | ||
552 | 542 | ||
553 | remain -= page_length; | 543 | remain -= page_length; |
554 | user_data += page_length; | 544 | user_data += page_length; |
555 | offset += page_length; | 545 | offset += page_length; |
556 | } | 546 | } |
557 | 547 | ||
558 | return 0; | 548 | out_unpin: |
559 | } | 549 | i915_gem_object_unpin(obj); |
560 | 550 | out: | |
561 | /** | ||
562 | * This is the fallback GTT pwrite path, which uses get_user_pages to pin | ||
563 | * the memory and maps it using kmap_atomic for copying. | ||
564 | * | ||
565 | * This code resulted in x11perf -rgb10text consuming about 10% more CPU | ||
566 | * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). | ||
567 | */ | ||
568 | static int | ||
569 | i915_gem_gtt_pwrite_slow(struct drm_device *dev, | ||
570 | struct drm_i915_gem_object *obj, | ||
571 | struct drm_i915_gem_pwrite *args, | ||
572 | struct drm_file *file) | ||
573 | { | ||
574 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
575 | ssize_t remain; | ||
576 | loff_t gtt_page_base, offset; | ||
577 | loff_t first_data_page, last_data_page, num_pages; | ||
578 | loff_t pinned_pages, i; | ||
579 | struct page **user_pages; | ||
580 | struct mm_struct *mm = current->mm; | ||
581 | int gtt_page_offset, data_page_offset, data_page_index, page_length; | ||
582 | int ret; | ||
583 | uint64_t data_ptr = args->data_ptr; | ||
584 | |||
585 | remain = args->size; | ||
586 | |||
587 | /* Pin the user pages containing the data. We can't fault while | ||
588 | * holding the struct mutex, and all of the pwrite implementations | ||
589 | * want to hold it while dereferencing the user data. | ||
590 | */ | ||
591 | first_data_page = data_ptr / PAGE_SIZE; | ||
592 | last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; | ||
593 | num_pages = last_data_page - first_data_page + 1; | ||
594 | |||
595 | user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); | ||
596 | if (user_pages == NULL) | ||
597 | return -ENOMEM; | ||
598 | |||
599 | mutex_unlock(&dev->struct_mutex); | ||
600 | down_read(&mm->mmap_sem); | ||
601 | pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, | ||
602 | num_pages, 0, 0, user_pages, NULL); | ||
603 | up_read(&mm->mmap_sem); | ||
604 | mutex_lock(&dev->struct_mutex); | ||
605 | if (pinned_pages < num_pages) { | ||
606 | ret = -EFAULT; | ||
607 | goto out_unpin_pages; | ||
608 | } | ||
609 | |||
610 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
611 | if (ret) | ||
612 | goto out_unpin_pages; | ||
613 | |||
614 | ret = i915_gem_object_put_fence(obj); | ||
615 | if (ret) | ||
616 | goto out_unpin_pages; | ||
617 | |||
618 | offset = obj->gtt_offset + args->offset; | ||
619 | |||
620 | while (remain > 0) { | ||
621 | /* Operation in this page | ||
622 | * | ||
623 | * gtt_page_base = page offset within aperture | ||
624 | * gtt_page_offset = offset within page in aperture | ||
625 | * data_page_index = page number in get_user_pages return | ||
626 | * data_page_offset = offset with data_page_index page. | ||
627 | * page_length = bytes to copy for this page | ||
628 | */ | ||
629 | gtt_page_base = offset & PAGE_MASK; | ||
630 | gtt_page_offset = offset_in_page(offset); | ||
631 | data_page_index = data_ptr / PAGE_SIZE - first_data_page; | ||
632 | data_page_offset = offset_in_page(data_ptr); | ||
633 | |||
634 | page_length = remain; | ||
635 | if ((gtt_page_offset + page_length) > PAGE_SIZE) | ||
636 | page_length = PAGE_SIZE - gtt_page_offset; | ||
637 | if ((data_page_offset + page_length) > PAGE_SIZE) | ||
638 | page_length = PAGE_SIZE - data_page_offset; | ||
639 | |||
640 | slow_kernel_write(dev_priv->mm.gtt_mapping, | ||
641 | gtt_page_base, gtt_page_offset, | ||
642 | user_pages[data_page_index], | ||
643 | data_page_offset, | ||
644 | page_length); | ||
645 | |||
646 | remain -= page_length; | ||
647 | offset += page_length; | ||
648 | data_ptr += page_length; | ||
649 | } | ||
650 | |||
651 | out_unpin_pages: | ||
652 | for (i = 0; i < pinned_pages; i++) | ||
653 | page_cache_release(user_pages[i]); | ||
654 | drm_free_large(user_pages); | ||
655 | |||
656 | return ret; | 551 | return ret; |
657 | } | 552 | } |
658 | 553 | ||
@@ -671,6 +566,10 @@ i915_gem_shmem_pwrite(struct drm_device *dev, | |||
671 | int hit_slowpath = 0; | 566 | int hit_slowpath = 0; |
672 | int release_page; | 567 | int release_page; |
673 | 568 | ||
569 | ret = i915_gem_object_set_to_cpu_domain(obj, 1); | ||
570 | if (ret) | ||
571 | return ret; | ||
572 | |||
674 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 573 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
675 | remain = args->size; | 574 | remain = args->size; |
676 | 575 | ||
@@ -814,6 +713,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
814 | 713 | ||
815 | trace_i915_gem_object_pwrite(obj, args->offset, args->size); | 714 | trace_i915_gem_object_pwrite(obj, args->offset, args->size); |
816 | 715 | ||
716 | ret = -EFAULT; | ||
817 | /* We can only do the GTT pwrite on untiled buffers, as otherwise | 717 | /* We can only do the GTT pwrite on untiled buffers, as otherwise |
818 | * it would end up going through the fenced access, and we'll get | 718 | * it would end up going through the fenced access, and we'll get |
819 | * different detiling behavior between reading and writing. | 719 | * different detiling behavior between reading and writing. |
@@ -828,37 +728,14 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
828 | if (obj->gtt_space && | 728 | if (obj->gtt_space && |
829 | obj->cache_level == I915_CACHE_NONE && | 729 | obj->cache_level == I915_CACHE_NONE && |
830 | obj->base.write_domain != I915_GEM_DOMAIN_CPU) { | 730 | obj->base.write_domain != I915_GEM_DOMAIN_CPU) { |
831 | ret = i915_gem_object_pin(obj, 0, true); | ||
832 | if (ret) | ||
833 | goto out; | ||
834 | |||
835 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
836 | if (ret) | ||
837 | goto out_unpin; | ||
838 | |||
839 | ret = i915_gem_object_put_fence(obj); | ||
840 | if (ret) | ||
841 | goto out_unpin; | ||
842 | |||
843 | ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); | 731 | ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); |
844 | if (ret == -EFAULT) | 732 | /* Note that the gtt paths might fail with non-page-backed user |
845 | ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); | 733 | * pointers (e.g. gtt mappings when moving data between |
846 | 734 | * textures). Fallback to the shmem path in that case. */ | |
847 | out_unpin: | ||
848 | i915_gem_object_unpin(obj); | ||
849 | |||
850 | if (ret != -EFAULT) | ||
851 | goto out; | ||
852 | /* Fall through to the shmfs paths because the gtt paths might | ||
853 | * fail with non-page-backed user pointers (e.g. gtt mappings | ||
854 | * when moving data between textures). */ | ||
855 | } | 735 | } |
856 | 736 | ||
857 | ret = i915_gem_object_set_to_cpu_domain(obj, 1); | 737 | if (ret == -EFAULT) |
858 | if (ret) | 738 | ret = i915_gem_shmem_pwrite(dev, obj, args, file); |
859 | goto out; | ||
860 | |||
861 | ret = i915_gem_shmem_pwrite(dev, obj, args, file); | ||
862 | 739 | ||
863 | out: | 740 | out: |
864 | drm_gem_object_unreference(&obj->base); | 741 | drm_gem_object_unreference(&obj->base); |