aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-11-25 14:32:06 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2010-11-25 16:19:26 -0500
commit432e58edc9de1d9c3d6a7b444b3c455b8f209a7d (patch)
tree8dd0b0edb78ddf1b59e6f8e5cd8939ff6ed961ec /drivers/gpu
parent54cf91dc4e51fd5070a9a2346377493cc38a1ca9 (diff)
drm/i915: Avoid allocation for execbuffer object list
Besides the minimal improvement in reducing the execbuffer overhead, the real benefit is clarifying a few routines. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h10
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c14
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c403
4 files changed, 199 insertions, 229 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6c10b645dde9..e7c4108c94cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -712,8 +712,8 @@ struct drm_i915_gem_object {
712 struct list_head mm_list; 712 struct list_head mm_list;
713 /** This object's place on GPU write list */ 713 /** This object's place on GPU write list */
714 struct list_head gpu_write_list; 714 struct list_head gpu_write_list;
715 /** This object's place on eviction list */ 715 /** This object's place in the batchbuffer or on the eviction list */
716 struct list_head evict_list; 716 struct list_head exec_list;
717 717
718 /** 718 /**
719 * This is set if the object is on the active or flushing lists 719 * This is set if the object is on the active or flushing lists
@@ -738,12 +738,6 @@ struct drm_i915_gem_object {
738 signed int fence_reg : 5; 738 signed int fence_reg : 5;
739 739
740 /** 740 /**
741 * Used for checking the object doesn't appear more than once
742 * in an execbuffer object list.
743 */
744 unsigned int in_execbuffer : 1;
745
746 /**
747 * Advice: are the backing pages purgeable? 741 * Advice: are the backing pages purgeable?
748 */ 742 */
749 unsigned int madv : 2; 743 unsigned int madv : 2;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b30c6c167048..d9d81f94a4b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3399,6 +3399,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3399 INIT_LIST_HEAD(&obj->mm_list); 3399 INIT_LIST_HEAD(&obj->mm_list);
3400 INIT_LIST_HEAD(&obj->gtt_list); 3400 INIT_LIST_HEAD(&obj->gtt_list);
3401 INIT_LIST_HEAD(&obj->ring_list); 3401 INIT_LIST_HEAD(&obj->ring_list);
3402 INIT_LIST_HEAD(&obj->exec_list);
3402 INIT_LIST_HEAD(&obj->gpu_write_list); 3403 INIT_LIST_HEAD(&obj->gpu_write_list);
3403 obj->madv = I915_MADV_WILLNEED; 3404 obj->madv = I915_MADV_WILLNEED;
3404 /* Avoid an unnecessary call to unbind on the first bind. */ 3405 /* Avoid an unnecessary call to unbind on the first bind. */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 03e15d37b550..78b8cf90c922 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -34,7 +34,7 @@
34static bool 34static bool
35mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind) 35mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
36{ 36{
37 list_add(&obj->evict_list, unwind); 37 list_add(&obj->exec_list, unwind);
38 drm_gem_object_reference(&obj->base); 38 drm_gem_object_reference(&obj->base);
39 return drm_mm_scan_add_block(obj->gtt_space); 39 return drm_mm_scan_add_block(obj->gtt_space);
40} 40}
@@ -127,7 +127,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
127 } 127 }
128 128
129 /* Nothing found, clean up and bail out! */ 129 /* Nothing found, clean up and bail out! */
130 list_for_each_entry(obj, &unwind_list, evict_list) { 130 list_for_each_entry(obj, &unwind_list, exec_list) {
131 ret = drm_mm_scan_remove_block(obj->gtt_space); 131 ret = drm_mm_scan_remove_block(obj->gtt_space);
132 BUG_ON(ret); 132 BUG_ON(ret);
133 drm_gem_object_unreference(&obj->base); 133 drm_gem_object_unreference(&obj->base);
@@ -146,12 +146,12 @@ found:
146 while (!list_empty(&unwind_list)) { 146 while (!list_empty(&unwind_list)) {
147 obj = list_first_entry(&unwind_list, 147 obj = list_first_entry(&unwind_list,
148 struct drm_i915_gem_object, 148 struct drm_i915_gem_object,
149 evict_list); 149 exec_list);
150 if (drm_mm_scan_remove_block(obj->gtt_space)) { 150 if (drm_mm_scan_remove_block(obj->gtt_space)) {
151 list_move(&obj->evict_list, &eviction_list); 151 list_move(&obj->exec_list, &eviction_list);
152 continue; 152 continue;
153 } 153 }
154 list_del(&obj->evict_list); 154 list_del_init(&obj->exec_list);
155 drm_gem_object_unreference(&obj->base); 155 drm_gem_object_unreference(&obj->base);
156 } 156 }
157 157
@@ -159,10 +159,10 @@ found:
159 while (!list_empty(&eviction_list)) { 159 while (!list_empty(&eviction_list)) {
160 obj = list_first_entry(&eviction_list, 160 obj = list_first_entry(&eviction_list,
161 struct drm_i915_gem_object, 161 struct drm_i915_gem_object,
162 evict_list); 162 exec_list);
163 if (ret == 0) 163 if (ret == 0)
164 ret = i915_gem_object_unbind(obj); 164 ret = i915_gem_object_unbind(obj);
165 list_del(&obj->evict_list); 165 list_del_init(&obj->exec_list);
166 drm_gem_object_unreference(&obj->base); 166 drm_gem_object_unreference(&obj->base);
167 } 167 }
168 168
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bdc613b91af8..d54070111f9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -406,18 +406,16 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
406static int 406static int
407i915_gem_execbuffer_relocate(struct drm_device *dev, 407i915_gem_execbuffer_relocate(struct drm_device *dev,
408 struct drm_file *file, 408 struct drm_file *file,
409 struct drm_i915_gem_object **object_list, 409 struct list_head *objects,
410 struct drm_i915_gem_exec_object2 *exec_list, 410 struct drm_i915_gem_exec_object2 *exec)
411 int count)
412{ 411{
413 int i, ret; 412 struct drm_i915_gem_object *obj;
413 int ret;
414 414
415 for (i = 0; i < count; i++) { 415 list_for_each_entry(obj, objects, exec_list) {
416 struct drm_i915_gem_object *obj = object_list[i];
417 obj->base.pending_read_domains = 0; 416 obj->base.pending_read_domains = 0;
418 obj->base.pending_write_domain = 0; 417 obj->base.pending_write_domain = 0;
419 ret = i915_gem_execbuffer_relocate_object(obj, file, 418 ret = i915_gem_execbuffer_relocate_object(obj, file, exec++);
420 &exec_list[i]);
421 if (ret) 419 if (ret)
422 return ret; 420 return ret;
423 } 421 }
@@ -428,11 +426,12 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
428static int 426static int
429i915_gem_execbuffer_reserve(struct drm_device *dev, 427i915_gem_execbuffer_reserve(struct drm_device *dev,
430 struct drm_file *file, 428 struct drm_file *file,
431 struct drm_i915_gem_object **object_list, 429 struct list_head *objects,
432 struct drm_i915_gem_exec_object2 *exec_list, 430 struct drm_i915_gem_exec_object2 *exec)
433 int count)
434{ 431{
435 int ret, i, retry; 432 struct drm_i915_gem_object *obj;
433 struct drm_i915_gem_exec_object2 *entry;
434 int ret, retry;
436 435
437 /* Attempt to pin all of the buffers into the GTT. 436 /* Attempt to pin all of the buffers into the GTT.
438 * This is done in 3 phases: 437 * This is done in 3 phases:
@@ -451,13 +450,14 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
451 ret = 0; 450 ret = 0;
452 451
453 /* Unbind any ill-fitting objects or pin. */ 452 /* Unbind any ill-fitting objects or pin. */
454 for (i = 0; i < count; i++) { 453 entry = exec;
455 struct drm_i915_gem_object *obj = object_list[i]; 454 list_for_each_entry(obj, objects, exec_list) {
456 struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
457 bool need_fence, need_mappable; 455 bool need_fence, need_mappable;
458 456
459 if (!obj->gtt_space) 457 if (!obj->gtt_space) {
458 entry++;
460 continue; 459 continue;
460 }
461 461
462 need_fence = 462 need_fence =
463 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 463 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
@@ -472,16 +472,15 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
472 ret = i915_gem_object_pin(obj, 472 ret = i915_gem_object_pin(obj,
473 entry->alignment, 473 entry->alignment,
474 need_mappable); 474 need_mappable);
475 if (ret) { 475 if (ret)
476 count = i;
477 goto err; 476 goto err;
478 } 477
478 entry++;
479 } 479 }
480 480
481 /* Bind fresh objects */ 481 /* Bind fresh objects */
482 for (i = 0; i < count; i++) { 482 entry = exec;
483 struct drm_i915_gem_exec_object2 *entry = &exec_list[i]; 483 list_for_each_entry(obj, objects, exec_list) {
484 struct drm_i915_gem_object *obj = object_list[i];
485 bool need_fence; 484 bool need_fence;
486 485
487 need_fence = 486 need_fence =
@@ -504,15 +503,15 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
504 if (ret) 503 if (ret)
505 break; 504 break;
506 505
507 obj->pending_fenced_gpu_access = true;
508 } 506 }
507 obj->pending_fenced_gpu_access = need_fence;
509 508
510 entry->offset = obj->gtt_offset; 509 entry->offset = obj->gtt_offset;
510 entry++;
511 } 511 }
512 512
513err: /* Decrement pin count for bound objects */ 513 /* Decrement pin count for bound objects */
514 for (i = 0; i < count; i++) { 514 list_for_each_entry(obj, objects, exec_list) {
515 struct drm_i915_gem_object *obj = object_list[i];
516 if (obj->gtt_space) 515 if (obj->gtt_space)
517 i915_gem_object_unpin(obj); 516 i915_gem_object_unpin(obj);
518 } 517 }
@@ -529,26 +528,36 @@ err: /* Decrement pin count for bound objects */
529 528
530 retry++; 529 retry++;
531 } while (1); 530 } while (1);
531
532err:
533 while (objects != &obj->exec_list) {
534 if (obj->gtt_space)
535 i915_gem_object_unpin(obj);
536
537 obj = list_entry(obj->exec_list.prev,
538 struct drm_i915_gem_object,
539 exec_list);
540 }
541
542 return ret;
532} 543}
533 544
534static int 545static int
535i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 546i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
536 struct drm_file *file, 547 struct drm_file *file,
537 struct drm_i915_gem_object **object_list, 548 struct list_head *objects,
538 struct drm_i915_gem_exec_object2 *exec_list, 549 struct drm_i915_gem_exec_object2 *exec,
539 int count) 550 int count)
540{ 551{
541 struct drm_i915_gem_relocation_entry *reloc; 552 struct drm_i915_gem_relocation_entry *reloc;
553 struct drm_i915_gem_object *obj;
542 int i, total, ret; 554 int i, total, ret;
543 555
544 for (i = 0; i < count; i++)
545 object_list[i]->in_execbuffer = false;
546
547 mutex_unlock(&dev->struct_mutex); 556 mutex_unlock(&dev->struct_mutex);
548 557
549 total = 0; 558 total = 0;
550 for (i = 0; i < count; i++) 559 for (i = 0; i < count; i++)
551 total += exec_list[i].relocation_count; 560 total += exec[i].relocation_count;
552 561
553 reloc = drm_malloc_ab(total, sizeof(*reloc)); 562 reloc = drm_malloc_ab(total, sizeof(*reloc));
554 if (reloc == NULL) { 563 if (reloc == NULL) {
@@ -560,17 +569,16 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
560 for (i = 0; i < count; i++) { 569 for (i = 0; i < count; i++) {
561 struct drm_i915_gem_relocation_entry __user *user_relocs; 570 struct drm_i915_gem_relocation_entry __user *user_relocs;
562 571
563 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 572 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
564 573
565 if (copy_from_user(reloc+total, user_relocs, 574 if (copy_from_user(reloc+total, user_relocs,
566 exec_list[i].relocation_count * 575 exec[i].relocation_count * sizeof(*reloc))) {
567 sizeof(*reloc))) {
568 ret = -EFAULT; 576 ret = -EFAULT;
569 mutex_lock(&dev->struct_mutex); 577 mutex_lock(&dev->struct_mutex);
570 goto err; 578 goto err;
571 } 579 }
572 580
573 total += exec_list[i].relocation_count; 581 total += exec[i].relocation_count;
574 } 582 }
575 583
576 ret = i915_mutex_lock_interruptible(dev); 584 ret = i915_mutex_lock_interruptible(dev);
@@ -579,24 +587,22 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
579 goto err; 587 goto err;
580 } 588 }
581 589
582 ret = i915_gem_execbuffer_reserve(dev, file, 590 ret = i915_gem_execbuffer_reserve(dev, file, objects, exec);
583 object_list, exec_list,
584 count);
585 if (ret) 591 if (ret)
586 goto err; 592 goto err;
587 593
588 total = 0; 594 total = 0;
589 for (i = 0; i < count; i++) { 595 list_for_each_entry(obj, objects, exec_list) {
590 struct drm_i915_gem_object *obj = object_list[i];
591 obj->base.pending_read_domains = 0; 596 obj->base.pending_read_domains = 0;
592 obj->base.pending_write_domain = 0; 597 obj->base.pending_write_domain = 0;
593 ret = i915_gem_execbuffer_relocate_object_slow(obj, file, 598 ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
594 &exec_list[i], 599 exec,
595 reloc + total); 600 reloc + total);
596 if (ret) 601 if (ret)
597 goto err; 602 goto err;
598 603
599 total += exec_list[i].relocation_count; 604 total += exec->relocation_count;
605 exec++;
600 } 606 }
601 607
602 /* Leave the user relocations as are, this is the painfully slow path, 608 /* Leave the user relocations as are, this is the painfully slow path,
@@ -636,20 +642,18 @@ i915_gem_execbuffer_flush(struct drm_device *dev,
636 642
637 643
638static int 644static int
639i915_gem_execbuffer_move_to_gpu(struct drm_device *dev, 645i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
640 struct drm_file *file, 646 struct list_head *objects)
641 struct intel_ring_buffer *ring,
642 struct drm_i915_gem_object **objects,
643 int count)
644{ 647{
648 struct drm_i915_gem_object *obj;
645 struct change_domains cd; 649 struct change_domains cd;
646 int ret, i; 650 int ret;
647 651
648 cd.invalidate_domains = 0; 652 cd.invalidate_domains = 0;
649 cd.flush_domains = 0; 653 cd.flush_domains = 0;
650 cd.flush_rings = 0; 654 cd.flush_rings = 0;
651 for (i = 0; i < count; i++) 655 list_for_each_entry(obj, objects, exec_list)
652 i915_gem_object_set_to_gpu_domain(objects[i], ring, &cd); 656 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
653 657
654 if (cd.invalidate_domains | cd.flush_domains) { 658 if (cd.invalidate_domains | cd.flush_domains) {
655#if WATCH_EXEC 659#if WATCH_EXEC
@@ -658,14 +662,13 @@ i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
658 cd.invalidate_domains, 662 cd.invalidate_domains,
659 cd.flush_domains); 663 cd.flush_domains);
660#endif 664#endif
661 i915_gem_execbuffer_flush(dev, 665 i915_gem_execbuffer_flush(ring->dev,
662 cd.invalidate_domains, 666 cd.invalidate_domains,
663 cd.flush_domains, 667 cd.flush_domains,
664 cd.flush_rings); 668 cd.flush_rings);
665 } 669 }
666 670
667 for (i = 0; i < count; i++) { 671 list_for_each_entry(obj, objects, exec_list) {
668 struct drm_i915_gem_object *obj = objects[i];
669 /* XXX replace with semaphores */ 672 /* XXX replace with semaphores */
670 if (obj->ring && ring != obj->ring) { 673 if (obj->ring && ring != obj->ring) {
671 ret = i915_gem_object_wait_rendering(obj, true); 674 ret = i915_gem_object_wait_rendering(obj, true);
@@ -677,22 +680,10 @@ i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
677 return 0; 680 return 0;
678} 681}
679 682
680static int 683static bool
681i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec, 684i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
682 uint64_t exec_offset)
683{ 685{
684 uint32_t exec_start, exec_len; 686 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
685
686 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
687 exec_len = (uint32_t) exec->batch_len;
688
689 if ((exec_start | exec_len) & 0x7)
690 return -EINVAL;
691
692 if (!exec_start)
693 return -EINVAL;
694
695 return 0;
696} 687}
697 688
698static int 689static int
@@ -726,36 +717,119 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
726 return 0; 717 return 0;
727} 718}
728 719
720static int
721i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring,
722 struct list_head *objects)
723{
724 struct drm_i915_gem_object *obj;
725 int flips;
726
727 /* Check for any pending flips. As we only maintain a flip queue depth
728 * of 1, we can simply insert a WAIT for the next display flip prior
729 * to executing the batch and avoid stalling the CPU.
730 */
731 flips = 0;
732 list_for_each_entry(obj, objects, exec_list) {
733 if (obj->base.write_domain)
734 flips |= atomic_read(&obj->pending_flip);
735 }
736 if (flips) {
737 int plane, flip_mask, ret;
738
739 for (plane = 0; flips >> plane; plane++) {
740 if (((flips >> plane) & 1) == 0)
741 continue;
742
743 if (plane)
744 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
745 else
746 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
747
748 ret = intel_ring_begin(ring, 2);
749 if (ret)
750 return ret;
751
752 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
753 intel_ring_emit(ring, MI_NOOP);
754 intel_ring_advance(ring);
755 }
756 }
757
758 return 0;
759}
760
761static void
762i915_gem_execbuffer_move_to_active(struct list_head *objects,
763 struct intel_ring_buffer *ring)
764{
765 struct drm_i915_gem_object *obj;
766
767 list_for_each_entry(obj, objects, exec_list) {
768 obj->base.read_domains = obj->base.pending_read_domains;
769 obj->base.write_domain = obj->base.pending_write_domain;
770 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
771
772 i915_gem_object_move_to_active(obj, ring);
773 if (obj->base.write_domain) {
774 obj->dirty = 1;
775 list_move_tail(&obj->gpu_write_list,
776 &ring->gpu_write_list);
777 intel_mark_busy(ring->dev, obj);
778 }
779
780 trace_i915_gem_object_change_domain(obj,
781 obj->base.read_domains,
782 obj->base.write_domain);
783 }
784}
785
729static void 786static void
730i915_gem_execbuffer_retire_commands(struct drm_device *dev, 787i915_gem_execbuffer_retire_commands(struct drm_device *dev,
788 struct drm_file *file,
731 struct intel_ring_buffer *ring) 789 struct intel_ring_buffer *ring)
732{ 790{
733 uint32_t flush_domains = 0; 791 struct drm_i915_gem_request *request;
792 u32 flush_domains;
734 793
735 /* The sampler always gets flushed on i965 (sigh) */ 794 /*
795 * Ensure that the commands in the batch buffer are
796 * finished before the interrupt fires.
797 *
798 * The sampler always gets flushed on i965 (sigh).
799 */
800 flush_domains = 0;
736 if (INTEL_INFO(dev)->gen >= 4) 801 if (INTEL_INFO(dev)->gen >= 4)
737 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 802 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
738 803
739 ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains); 804 ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
740}
741 805
806 /* Add a breadcrumb for the completion of the batch buffer */
807 request = kzalloc(sizeof(*request), GFP_KERNEL);
808 if (request == NULL || i915_add_request(dev, file, request, ring)) {
809 i915_gem_next_request_seqno(dev, ring);
810 kfree(request);
811 }
812}
742 813
743static int 814static int
744i915_gem_do_execbuffer(struct drm_device *dev, void *data, 815i915_gem_do_execbuffer(struct drm_device *dev, void *data,
745 struct drm_file *file, 816 struct drm_file *file,
746 struct drm_i915_gem_execbuffer2 *args, 817 struct drm_i915_gem_execbuffer2 *args,
747 struct drm_i915_gem_exec_object2 *exec_list) 818 struct drm_i915_gem_exec_object2 *exec)
748{ 819{
749 drm_i915_private_t *dev_priv = dev->dev_private; 820 drm_i915_private_t *dev_priv = dev->dev_private;
750 struct drm_i915_gem_object **object_list = NULL; 821 struct list_head objects;
751 struct drm_i915_gem_object *batch_obj; 822 struct drm_i915_gem_object *batch_obj;
752 struct drm_clip_rect *cliprects = NULL; 823 struct drm_clip_rect *cliprects = NULL;
753 struct drm_i915_gem_request *request = NULL;
754 struct intel_ring_buffer *ring; 824 struct intel_ring_buffer *ring;
755 int ret, i, flips; 825 int ret, i;
756 uint64_t exec_offset;
757 826
758 ret = validate_exec_list(exec_list, args->buffer_count); 827 if (!i915_gem_check_execbuffer(args)) {
828 DRM_ERROR("execbuf with invalid offset/length\n");
829 return -EINVAL;
830 }
831
832 ret = validate_exec_list(exec, args->buffer_count);
759 if (ret) 833 if (ret)
760 return ret; 834 return ret;
761 835
@@ -792,40 +866,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
792 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 866 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
793 return -EINVAL; 867 return -EINVAL;
794 } 868 }
795 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
796 if (object_list == NULL) {
797 DRM_ERROR("Failed to allocate object list for %d buffers\n",
798 args->buffer_count);
799 ret = -ENOMEM;
800 goto pre_mutex_err;
801 }
802 869
803 if (args->num_cliprects != 0) { 870 if (args->num_cliprects != 0) {
804 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), 871 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
805 GFP_KERNEL); 872 GFP_KERNEL);
806 if (cliprects == NULL) { 873 if (cliprects == NULL) {
807 ret = -ENOMEM; 874 ret = -ENOMEM;
808 goto pre_mutex_err; 875 goto pre_mutex_err;
809 } 876 }
810 877
811 ret = copy_from_user(cliprects, 878 if (copy_from_user(cliprects,
812 (struct drm_clip_rect __user *) 879 (struct drm_clip_rect __user *)(uintptr_t)
813 (uintptr_t) args->cliprects_ptr, 880 args->cliprects_ptr,
814 sizeof(*cliprects) * args->num_cliprects); 881 sizeof(*cliprects)*args->num_cliprects)) {
815 if (ret != 0) {
816 DRM_ERROR("copy %d cliprects failed: %d\n",
817 args->num_cliprects, ret);
818 ret = -EFAULT; 882 ret = -EFAULT;
819 goto pre_mutex_err; 883 goto pre_mutex_err;
820 } 884 }
821 } 885 }
822 886
823 request = kzalloc(sizeof(*request), GFP_KERNEL);
824 if (request == NULL) {
825 ret = -ENOMEM;
826 goto pre_mutex_err;
827 }
828
829 ret = i915_mutex_lock_interruptible(dev); 887 ret = i915_mutex_lock_interruptible(dev);
830 if (ret) 888 if (ret)
831 goto pre_mutex_err; 889 goto pre_mutex_err;
@@ -837,49 +895,41 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
837 } 895 }
838 896
839 /* Look up object handles */ 897 /* Look up object handles */
898 INIT_LIST_HEAD(&objects);
840 for (i = 0; i < args->buffer_count; i++) { 899 for (i = 0; i < args->buffer_count; i++) {
841 struct drm_i915_gem_object *obj; 900 struct drm_i915_gem_object *obj;
842 901
843 obj = to_intel_bo (drm_gem_object_lookup(dev, file, 902 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
844 exec_list[i].handle)); 903 exec[i].handle));
845 if (obj == NULL) { 904 if (obj == NULL) {
846 DRM_ERROR("Invalid object handle %d at index %d\n", 905 DRM_ERROR("Invalid object handle %d at index %d\n",
847 exec_list[i].handle, i); 906 exec[i].handle, i);
848 /* prevent error path from reading uninitialized data */ 907 /* prevent error path from reading uninitialized data */
849 args->buffer_count = i;
850 ret = -ENOENT; 908 ret = -ENOENT;
851 goto err; 909 goto err;
852 } 910 }
853 object_list[i] = obj;
854 911
855 if (obj->in_execbuffer) { 912 if (!list_empty(&obj->exec_list)) {
856 DRM_ERROR("Object %p appears more than once in object list\n", 913 DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n",
857 obj); 914 obj, exec[i].handle, i);
858 /* prevent error path from reading uninitialized data */
859 args->buffer_count = i + 1;
860 ret = -EINVAL; 915 ret = -EINVAL;
861 goto err; 916 goto err;
862 } 917 }
863 obj->in_execbuffer = true; 918
864 obj->pending_fenced_gpu_access = false; 919 list_add_tail(&obj->exec_list, &objects);
865 } 920 }
866 921
867 /* Move the objects en-masse into the GTT, evicting if necessary. */ 922 /* Move the objects en-masse into the GTT, evicting if necessary. */
868 ret = i915_gem_execbuffer_reserve(dev, file, 923 ret = i915_gem_execbuffer_reserve(dev, file, &objects, exec);
869 object_list, exec_list,
870 args->buffer_count);
871 if (ret) 924 if (ret)
872 goto err; 925 goto err;
873 926
874 /* The objects are in their final locations, apply the relocations. */ 927 /* The objects are in their final locations, apply the relocations. */
875 ret = i915_gem_execbuffer_relocate(dev, file, 928 ret = i915_gem_execbuffer_relocate(dev, file, &objects, exec);
876 object_list, exec_list,
877 args->buffer_count);
878 if (ret) { 929 if (ret) {
879 if (ret == -EFAULT) { 930 if (ret == -EFAULT) {
880 ret = i915_gem_execbuffer_relocate_slow(dev, file, 931 ret = i915_gem_execbuffer_relocate_slow(dev, file,
881 object_list, 932 &objects, exec,
882 exec_list,
883 args->buffer_count); 933 args->buffer_count);
884 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 934 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
885 } 935 }
@@ -888,7 +938,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
888 } 938 }
889 939
890 /* Set the pending read domains for the batch buffer to COMMAND */ 940 /* Set the pending read domains for the batch buffer to COMMAND */
891 batch_obj = object_list[args->buffer_count-1]; 941 batch_obj = list_entry(objects.prev,
942 struct drm_i915_gem_object,
943 exec_list);
892 if (batch_obj->base.pending_write_domain) { 944 if (batch_obj->base.pending_write_domain) {
893 DRM_ERROR("Attempting to use self-modifying batch buffer\n"); 945 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
894 ret = -EINVAL; 946 ret = -EINVAL;
@@ -896,115 +948,38 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
896 } 948 }
897 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 949 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
898 950
899 /* Sanity check the batch buffer */ 951 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
900 exec_offset = batch_obj->gtt_offset; 952 if (ret)
901 ret = i915_gem_check_execbuffer(args, exec_offset);
902 if (ret != 0) {
903 DRM_ERROR("execbuf with invalid offset/length\n");
904 goto err; 953 goto err;
905 }
906 954
907 ret = i915_gem_execbuffer_move_to_gpu(dev, file, ring, 955 ret = i915_gem_execbuffer_wait_for_flips(ring, &objects);
908 object_list, args->buffer_count);
909 if (ret) 956 if (ret)
910 goto err; 957 goto err;
911 958
912#if WATCH_COHERENCY 959 ret = ring->dispatch_execbuffer(ring,
913 for (i = 0; i < args->buffer_count; i++) { 960 args, cliprects,
914 i915_gem_object_check_coherency(object_list[i], 961 batch_obj->gtt_offset);
915 exec_list[i].handle); 962 if (ret)
916 }
917#endif
918
919#if WATCH_EXEC
920 i915_gem_dump_object(batch_obj,
921 args->batch_len,
922 __func__,
923 ~0);
924#endif
925
926 /* Check for any pending flips. As we only maintain a flip queue depth
927 * of 1, we can simply insert a WAIT for the next display flip prior
928 * to executing the batch and avoid stalling the CPU.
929 */
930 flips = 0;
931 for (i = 0; i < args->buffer_count; i++) {
932 if (object_list[i]->base.write_domain)
933 flips |= atomic_read(&object_list[i]->pending_flip);
934 }
935 if (flips) {
936 int plane, flip_mask;
937
938 for (plane = 0; flips >> plane; plane++) {
939 if (((flips >> plane) & 1) == 0)
940 continue;
941
942 if (plane)
943 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
944 else
945 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
946
947 ret = intel_ring_begin(ring, 2);
948 if (ret)
949 goto err;
950
951 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
952 intel_ring_emit(ring, MI_NOOP);
953 intel_ring_advance(ring);
954 }
955 }
956
957 /* Exec the batchbuffer */
958 ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
959 if (ret) {
960 DRM_ERROR("dispatch failed %d\n", ret);
961 goto err; 963 goto err;
962 }
963 964
964 for (i = 0; i < args->buffer_count; i++) { 965 i915_gem_execbuffer_move_to_active(&objects, ring);
965 struct drm_i915_gem_object *obj = object_list[i]; 966 i915_gem_execbuffer_retire_commands(dev, file, ring);
966
967 obj->base.read_domains = obj->base.pending_read_domains;
968 obj->base.write_domain = obj->base.pending_write_domain;
969 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
970
971 i915_gem_object_move_to_active(obj, ring);
972 if (obj->base.write_domain) {
973 obj->dirty = 1;
974 list_move_tail(&obj->gpu_write_list,
975 &ring->gpu_write_list);
976 intel_mark_busy(dev, obj);
977 }
978
979 trace_i915_gem_object_change_domain(obj,
980 obj->base.read_domains,
981 obj->base.write_domain);
982 }
983
984 /*
985 * Ensure that the commands in the batch buffer are
986 * finished before the interrupt fires
987 */
988 i915_gem_execbuffer_retire_commands(dev, ring);
989
990 if (i915_add_request(dev, file, request, ring))
991 i915_gem_next_request_seqno(dev, ring);
992 else
993 request = NULL;
994 967
995err: 968err:
996 for (i = 0; i < args->buffer_count; i++) { 969 while (!list_empty(&objects)) {
997 object_list[i]->in_execbuffer = false; 970 struct drm_i915_gem_object *obj;
998 drm_gem_object_unreference(&object_list[i]->base); 971
972 obj = list_first_entry(&objects,
973 struct drm_i915_gem_object,
974 exec_list);
975 list_del_init(&obj->exec_list);
976 drm_gem_object_unreference(&obj->base);
999 } 977 }
1000 978
1001 mutex_unlock(&dev->struct_mutex); 979 mutex_unlock(&dev->struct_mutex);
1002 980
1003pre_mutex_err: 981pre_mutex_err:
1004 drm_free_large(object_list);
1005 kfree(cliprects); 982 kfree(cliprects);
1006 kfree(request);
1007
1008 return ret; 983 return ret;
1009} 984}
1010 985