diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 119 |
1 files changed, 75 insertions, 44 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 61129e6759e..e69834341ef 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -268,7 +268,6 @@ eb_destroy(struct eb_objects *eb) | |||
268 | static int | 268 | static int |
269 | i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, | 269 | i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, |
270 | struct eb_objects *eb, | 270 | struct eb_objects *eb, |
271 | struct drm_i915_gem_exec_object2 *entry, | ||
272 | struct drm_i915_gem_relocation_entry *reloc) | 271 | struct drm_i915_gem_relocation_entry *reloc) |
273 | { | 272 | { |
274 | struct drm_device *dev = obj->base.dev; | 273 | struct drm_device *dev = obj->base.dev; |
@@ -411,10 +410,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, | |||
411 | 410 | ||
412 | static int | 411 | static int |
413 | i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, | 412 | i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, |
414 | struct eb_objects *eb, | 413 | struct eb_objects *eb) |
415 | struct drm_i915_gem_exec_object2 *entry) | ||
416 | { | 414 | { |
417 | struct drm_i915_gem_relocation_entry __user *user_relocs; | 415 | struct drm_i915_gem_relocation_entry __user *user_relocs; |
416 | struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; | ||
418 | int i, ret; | 417 | int i, ret; |
419 | 418 | ||
420 | user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; | 419 | user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; |
@@ -426,7 +425,7 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, | |||
426 | sizeof(reloc))) | 425 | sizeof(reloc))) |
427 | return -EFAULT; | 426 | return -EFAULT; |
428 | 427 | ||
429 | ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &reloc); | 428 | ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); |
430 | if (ret) | 429 | if (ret) |
431 | return ret; | 430 | return ret; |
432 | 431 | ||
@@ -442,13 +441,13 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, | |||
442 | static int | 441 | static int |
443 | i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, | 442 | i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, |
444 | struct eb_objects *eb, | 443 | struct eb_objects *eb, |
445 | struct drm_i915_gem_exec_object2 *entry, | ||
446 | struct drm_i915_gem_relocation_entry *relocs) | 444 | struct drm_i915_gem_relocation_entry *relocs) |
447 | { | 445 | { |
446 | const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; | ||
448 | int i, ret; | 447 | int i, ret; |
449 | 448 | ||
450 | for (i = 0; i < entry->relocation_count; i++) { | 449 | for (i = 0; i < entry->relocation_count; i++) { |
451 | ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &relocs[i]); | 450 | ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); |
452 | if (ret) | 451 | if (ret) |
453 | return ret; | 452 | return ret; |
454 | } | 453 | } |
@@ -459,8 +458,7 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, | |||
459 | static int | 458 | static int |
460 | i915_gem_execbuffer_relocate(struct drm_device *dev, | 459 | i915_gem_execbuffer_relocate(struct drm_device *dev, |
461 | struct eb_objects *eb, | 460 | struct eb_objects *eb, |
462 | struct list_head *objects, | 461 | struct list_head *objects) |
463 | struct drm_i915_gem_exec_object2 *exec) | ||
464 | { | 462 | { |
465 | struct drm_i915_gem_object *obj; | 463 | struct drm_i915_gem_object *obj; |
466 | int ret; | 464 | int ret; |
@@ -468,7 +466,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, | |||
468 | list_for_each_entry(obj, objects, exec_list) { | 466 | list_for_each_entry(obj, objects, exec_list) { |
469 | obj->base.pending_read_domains = 0; | 467 | obj->base.pending_read_domains = 0; |
470 | obj->base.pending_write_domain = 0; | 468 | obj->base.pending_write_domain = 0; |
471 | ret = i915_gem_execbuffer_relocate_object(obj, eb, exec++); | 469 | ret = i915_gem_execbuffer_relocate_object(obj, eb); |
472 | if (ret) | 470 | if (ret) |
473 | return ret; | 471 | return ret; |
474 | } | 472 | } |
@@ -479,13 +477,36 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, | |||
479 | static int | 477 | static int |
480 | i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, | 478 | i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, |
481 | struct drm_file *file, | 479 | struct drm_file *file, |
482 | struct list_head *objects, | 480 | struct list_head *objects) |
483 | struct drm_i915_gem_exec_object2 *exec) | ||
484 | { | 481 | { |
485 | struct drm_i915_gem_object *obj; | 482 | struct drm_i915_gem_object *obj; |
486 | struct drm_i915_gem_exec_object2 *entry; | ||
487 | int ret, retry; | 483 | int ret, retry; |
488 | bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; | 484 | bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
485 | struct list_head ordered_objects; | ||
486 | |||
487 | INIT_LIST_HEAD(&ordered_objects); | ||
488 | while (!list_empty(objects)) { | ||
489 | struct drm_i915_gem_exec_object2 *entry; | ||
490 | bool need_fence, need_mappable; | ||
491 | |||
492 | obj = list_first_entry(objects, | ||
493 | struct drm_i915_gem_object, | ||
494 | exec_list); | ||
495 | entry = obj->exec_entry; | ||
496 | |||
497 | need_fence = | ||
498 | has_fenced_gpu_access && | ||
499 | entry->flags & EXEC_OBJECT_NEEDS_FENCE && | ||
500 | obj->tiling_mode != I915_TILING_NONE; | ||
501 | need_mappable = | ||
502 | entry->relocation_count ? true : need_fence; | ||
503 | |||
504 | if (need_mappable) | ||
505 | list_move(&obj->exec_list, &ordered_objects); | ||
506 | else | ||
507 | list_move_tail(&obj->exec_list, &ordered_objects); | ||
508 | } | ||
509 | list_splice(&ordered_objects, objects); | ||
489 | 510 | ||
490 | /* Attempt to pin all of the buffers into the GTT. | 511 | /* Attempt to pin all of the buffers into the GTT. |
491 | * This is done in 3 phases: | 512 | * This is done in 3 phases: |
@@ -504,14 +525,11 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, | |||
504 | ret = 0; | 525 | ret = 0; |
505 | 526 | ||
506 | /* Unbind any ill-fitting objects or pin. */ | 527 | /* Unbind any ill-fitting objects or pin. */ |
507 | entry = exec; | ||
508 | list_for_each_entry(obj, objects, exec_list) { | 528 | list_for_each_entry(obj, objects, exec_list) { |
529 | struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; | ||
509 | bool need_fence, need_mappable; | 530 | bool need_fence, need_mappable; |
510 | 531 | if (!obj->gtt_space) | |
511 | if (!obj->gtt_space) { | ||
512 | entry++; | ||
513 | continue; | 532 | continue; |
514 | } | ||
515 | 533 | ||
516 | need_fence = | 534 | need_fence = |
517 | has_fenced_gpu_access && | 535 | has_fenced_gpu_access && |
@@ -534,8 +552,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, | |||
534 | } | 552 | } |
535 | 553 | ||
536 | /* Bind fresh objects */ | 554 | /* Bind fresh objects */ |
537 | entry = exec; | ||
538 | list_for_each_entry(obj, objects, exec_list) { | 555 | list_for_each_entry(obj, objects, exec_list) { |
556 | struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; | ||
539 | bool need_fence; | 557 | bool need_fence; |
540 | 558 | ||
541 | need_fence = | 559 | need_fence = |
@@ -570,7 +588,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, | |||
570 | } | 588 | } |
571 | 589 | ||
572 | entry->offset = obj->gtt_offset; | 590 | entry->offset = obj->gtt_offset; |
573 | entry++; | ||
574 | } | 591 | } |
575 | 592 | ||
576 | /* Decrement pin count for bound objects */ | 593 | /* Decrement pin count for bound objects */ |
@@ -622,7 +639,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, | |||
622 | int i, total, ret; | 639 | int i, total, ret; |
623 | 640 | ||
624 | /* We may process another execbuffer during the unlock... */ | 641 | /* We may process another execbuffer during the unlock... */ |
625 | while (list_empty(objects)) { | 642 | while (!list_empty(objects)) { |
626 | obj = list_first_entry(objects, | 643 | obj = list_first_entry(objects, |
627 | struct drm_i915_gem_object, | 644 | struct drm_i915_gem_object, |
628 | exec_list); | 645 | exec_list); |
@@ -665,7 +682,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, | |||
665 | } | 682 | } |
666 | 683 | ||
667 | /* reacquire the objects */ | 684 | /* reacquire the objects */ |
668 | INIT_LIST_HEAD(objects); | ||
669 | eb_reset(eb); | 685 | eb_reset(eb); |
670 | for (i = 0; i < count; i++) { | 686 | for (i = 0; i < count; i++) { |
671 | struct drm_i915_gem_object *obj; | 687 | struct drm_i915_gem_object *obj; |
@@ -681,10 +697,11 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, | |||
681 | 697 | ||
682 | list_add_tail(&obj->exec_list, objects); | 698 | list_add_tail(&obj->exec_list, objects); |
683 | obj->exec_handle = exec[i].handle; | 699 | obj->exec_handle = exec[i].handle; |
700 | obj->exec_entry = &exec[i]; | ||
684 | eb_add_object(eb, obj); | 701 | eb_add_object(eb, obj); |
685 | } | 702 | } |
686 | 703 | ||
687 | ret = i915_gem_execbuffer_reserve(ring, file, objects, exec); | 704 | ret = i915_gem_execbuffer_reserve(ring, file, objects); |
688 | if (ret) | 705 | if (ret) |
689 | goto err; | 706 | goto err; |
690 | 707 | ||
@@ -693,7 +710,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, | |||
693 | obj->base.pending_read_domains = 0; | 710 | obj->base.pending_read_domains = 0; |
694 | obj->base.pending_write_domain = 0; | 711 | obj->base.pending_write_domain = 0; |
695 | ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, | 712 | ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, |
696 | exec, | ||
697 | reloc + total); | 713 | reloc + total); |
698 | if (ret) | 714 | if (ret) |
699 | goto err; | 715 | goto err; |
@@ -713,25 +729,34 @@ err: | |||
713 | return ret; | 729 | return ret; |
714 | } | 730 | } |
715 | 731 | ||
716 | static void | 732 | static int |
717 | i915_gem_execbuffer_flush(struct drm_device *dev, | 733 | i915_gem_execbuffer_flush(struct drm_device *dev, |
718 | uint32_t invalidate_domains, | 734 | uint32_t invalidate_domains, |
719 | uint32_t flush_domains, | 735 | uint32_t flush_domains, |
720 | uint32_t flush_rings) | 736 | uint32_t flush_rings) |
721 | { | 737 | { |
722 | drm_i915_private_t *dev_priv = dev->dev_private; | 738 | drm_i915_private_t *dev_priv = dev->dev_private; |
723 | int i; | 739 | int i, ret; |
724 | 740 | ||
725 | if (flush_domains & I915_GEM_DOMAIN_CPU) | 741 | if (flush_domains & I915_GEM_DOMAIN_CPU) |
726 | intel_gtt_chipset_flush(); | 742 | intel_gtt_chipset_flush(); |
727 | 743 | ||
744 | if (flush_domains & I915_GEM_DOMAIN_GTT) | ||
745 | wmb(); | ||
746 | |||
728 | if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { | 747 | if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { |
729 | for (i = 0; i < I915_NUM_RINGS; i++) | 748 | for (i = 0; i < I915_NUM_RINGS; i++) |
730 | if (flush_rings & (1 << i)) | 749 | if (flush_rings & (1 << i)) { |
731 | i915_gem_flush_ring(dev, &dev_priv->ring[i], | 750 | ret = i915_gem_flush_ring(dev, |
732 | invalidate_domains, | 751 | &dev_priv->ring[i], |
733 | flush_domains); | 752 | invalidate_domains, |
753 | flush_domains); | ||
754 | if (ret) | ||
755 | return ret; | ||
756 | } | ||
734 | } | 757 | } |
758 | |||
759 | return 0; | ||
735 | } | 760 | } |
736 | 761 | ||
737 | static int | 762 | static int |
@@ -795,10 +820,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, | |||
795 | cd.invalidate_domains, | 820 | cd.invalidate_domains, |
796 | cd.flush_domains); | 821 | cd.flush_domains); |
797 | #endif | 822 | #endif |
798 | i915_gem_execbuffer_flush(ring->dev, | 823 | ret = i915_gem_execbuffer_flush(ring->dev, |
799 | cd.invalidate_domains, | 824 | cd.invalidate_domains, |
800 | cd.flush_domains, | 825 | cd.flush_domains, |
801 | cd.flush_rings); | 826 | cd.flush_rings); |
827 | if (ret) | ||
828 | return ret; | ||
802 | } | 829 | } |
803 | 830 | ||
804 | list_for_each_entry(obj, objects, exec_list) { | 831 | list_for_each_entry(obj, objects, exec_list) { |
@@ -921,7 +948,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, | |||
921 | struct intel_ring_buffer *ring) | 948 | struct intel_ring_buffer *ring) |
922 | { | 949 | { |
923 | struct drm_i915_gem_request *request; | 950 | struct drm_i915_gem_request *request; |
924 | u32 flush_domains; | 951 | u32 invalidate; |
925 | 952 | ||
926 | /* | 953 | /* |
927 | * Ensure that the commands in the batch buffer are | 954 | * Ensure that the commands in the batch buffer are |
@@ -929,11 +956,13 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, | |||
929 | * | 956 | * |
930 | * The sampler always gets flushed on i965 (sigh). | 957 | * The sampler always gets flushed on i965 (sigh). |
931 | */ | 958 | */ |
932 | flush_domains = 0; | 959 | invalidate = I915_GEM_DOMAIN_COMMAND; |
933 | if (INTEL_INFO(dev)->gen >= 4) | 960 | if (INTEL_INFO(dev)->gen >= 4) |
934 | flush_domains |= I915_GEM_DOMAIN_SAMPLER; | 961 | invalidate |= I915_GEM_DOMAIN_SAMPLER; |
935 | 962 | if (ring->flush(ring, invalidate, 0)) { | |
936 | ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains); | 963 | i915_gem_next_request_seqno(dev, ring); |
964 | return; | ||
965 | } | ||
937 | 966 | ||
938 | /* Add a breadcrumb for the completion of the batch buffer */ | 967 | /* Add a breadcrumb for the completion of the batch buffer */ |
939 | request = kzalloc(sizeof(*request), GFP_KERNEL); | 968 | request = kzalloc(sizeof(*request), GFP_KERNEL); |
@@ -1098,16 +1127,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, | |||
1098 | 1127 | ||
1099 | list_add_tail(&obj->exec_list, &objects); | 1128 | list_add_tail(&obj->exec_list, &objects); |
1100 | obj->exec_handle = exec[i].handle; | 1129 | obj->exec_handle = exec[i].handle; |
1130 | obj->exec_entry = &exec[i]; | ||
1101 | eb_add_object(eb, obj); | 1131 | eb_add_object(eb, obj); |
1102 | } | 1132 | } |
1103 | 1133 | ||
1134 | /* take note of the batch buffer before we might reorder the lists */ | ||
1135 | batch_obj = list_entry(objects.prev, | ||
1136 | struct drm_i915_gem_object, | ||
1137 | exec_list); | ||
1138 | |||
1104 | /* Move the objects en-masse into the GTT, evicting if necessary. */ | 1139 | /* Move the objects en-masse into the GTT, evicting if necessary. */ |
1105 | ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec); | 1140 | ret = i915_gem_execbuffer_reserve(ring, file, &objects); |
1106 | if (ret) | 1141 | if (ret) |
1107 | goto err; | 1142 | goto err; |
1108 | 1143 | ||
1109 | /* The objects are in their final locations, apply the relocations. */ | 1144 | /* The objects are in their final locations, apply the relocations. */ |
1110 | ret = i915_gem_execbuffer_relocate(dev, eb, &objects, exec); | 1145 | ret = i915_gem_execbuffer_relocate(dev, eb, &objects); |
1111 | if (ret) { | 1146 | if (ret) { |
1112 | if (ret == -EFAULT) { | 1147 | if (ret == -EFAULT) { |
1113 | ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, | 1148 | ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, |
@@ -1121,9 +1156,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, | |||
1121 | } | 1156 | } |
1122 | 1157 | ||
1123 | /* Set the pending read domains for the batch buffer to COMMAND */ | 1158 | /* Set the pending read domains for the batch buffer to COMMAND */ |
1124 | batch_obj = list_entry(objects.prev, | ||
1125 | struct drm_i915_gem_object, | ||
1126 | exec_list); | ||
1127 | if (batch_obj->base.pending_write_domain) { | 1159 | if (batch_obj->base.pending_write_domain) { |
1128 | DRM_ERROR("Attempting to use self-modifying batch buffer\n"); | 1160 | DRM_ERROR("Attempting to use self-modifying batch buffer\n"); |
1129 | ret = -EINVAL; | 1161 | ret = -EINVAL; |
@@ -1340,4 +1372,3 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, | |||
1340 | drm_free_large(exec2_list); | 1372 | drm_free_large(exec2_list); |
1341 | return ret; | 1373 | return ret; |
1342 | } | 1374 | } |
1343 | |||