aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 02:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 02:29:23 -0400
commit612a9aab56a93533e76e3ad91642db7033e03b69 (patch)
tree8402096973f67af941f9392f7da06cca03e0b58a /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parent3a494318b14b1bc0f59d2d6ce84c505c74d82d2a (diff)
parent268d28371cd326be4dfcd7eba5917bf4b9d30c8f (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm merge (part 1) from Dave Airlie: "So first of all my tree and uapi stuff has a conflict mess, its my fault as the nouveau stuff didn't hit -next as were trying to rebase regressions out of it before we merged. Highlights: - SH mobile modesetting driver and associated helpers - some DRM core documentation - i915 modesetting rework, haswell hdmi, haswell and vlv fixes, write combined pte writing, ilk rc6 support, - nouveau: major driver rework into a hw core driver, makes features like SLI a lot saner to implement, - psb: add eDP/DP support for Cedarview - radeon: 2 layer page tables, async VM pte updates, better PLL selection for > 2 screens, better ACPI interactions The rest is general grab bag of fixes. So why part 1? well I have the exynos pull req which came in a bit late but was waiting for me to do something they shouldn't have and it looks fairly safe, and David Howells has some more header cleanups he'd like me to pull, that seem like a good idea, but I'd like to get this merge out of the way so -next dosen't get blocked." Tons of conflicts mostly due to silly include line changes, but mostly mindless. A few other small semantic conflicts too, noted from Dave's pre-merged branch. * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (447 commits) drm/nv98/crypt: fix fuc build with latest envyas drm/nouveau/devinit: fixup various issues with subdev ctor/init ordering drm/nv41/vm: fix and enable use of "real" pciegart drm/nv44/vm: fix and enable use of "real" pciegart drm/nv04/dmaobj: fixup vm target handling in preparation for nv4x pcie drm/nouveau: store supported dma mask in vmmgr drm/nvc0/ibus: initial implementation of subdev drm/nouveau/therm: add support for fan-control modes drm/nouveau/hwmon: rename pwm0* to pmw1* to follow hwmon's rules drm/nouveau/therm: calculate the pwm divisor on nv50+ drm/nouveau/fan: rewrite the fan tachometer driver to get more precision, faster drm/nouveau/therm: move thermal-related functions to the therm subdev drm/nouveau/bios: parse the pwm divisor from the perf table drm/nouveau/therm: use the EXTDEV table to detect i2c monitoring devices drm/nouveau/therm: rework thermal table parsing drm/nouveau/gpio: expose the PWM/TOGGLE parameter found in the gpio vbios table drm/nouveau: fix pm initialization order drm/nouveau/bios: check that fixed tvdac gpio data is valid before using it drm/nouveau: log channel debug/error messages from client object rather than drm client drm/nouveau: have drm debugging macros build on top of core macros ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c391
1 files changed, 82 insertions, 309 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8dd9a6f47db8..3eea143749f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -33,180 +33,6 @@
33#include "intel_drv.h" 33#include "intel_drv.h"
34#include <linux/dma_remapping.h> 34#include <linux/dma_remapping.h>
35 35
36struct change_domains {
37 uint32_t invalidate_domains;
38 uint32_t flush_domains;
39 uint32_t flush_rings;
40 uint32_t flips;
41};
42
43/*
44 * Set the next domain for the specified object. This
45 * may not actually perform the necessary flushing/invaliding though,
46 * as that may want to be batched with other set_domain operations
47 *
48 * This is (we hope) the only really tricky part of gem. The goal
49 * is fairly simple -- track which caches hold bits of the object
50 * and make sure they remain coherent. A few concrete examples may
51 * help to explain how it works. For shorthand, we use the notation
52 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
53 * a pair of read and write domain masks.
54 *
55 * Case 1: the batch buffer
56 *
57 * 1. Allocated
58 * 2. Written by CPU
59 * 3. Mapped to GTT
60 * 4. Read by GPU
61 * 5. Unmapped from GTT
62 * 6. Freed
63 *
64 * Let's take these a step at a time
65 *
66 * 1. Allocated
67 * Pages allocated from the kernel may still have
68 * cache contents, so we set them to (CPU, CPU) always.
69 * 2. Written by CPU (using pwrite)
70 * The pwrite function calls set_domain (CPU, CPU) and
71 * this function does nothing (as nothing changes)
72 * 3. Mapped by GTT
73 * This function asserts that the object is not
74 * currently in any GPU-based read or write domains
75 * 4. Read by GPU
76 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
77 * As write_domain is zero, this function adds in the
78 * current read domains (CPU+COMMAND, 0).
79 * flush_domains is set to CPU.
80 * invalidate_domains is set to COMMAND
81 * clflush is run to get data out of the CPU caches
82 * then i915_dev_set_domain calls i915_gem_flush to
83 * emit an MI_FLUSH and drm_agp_chipset_flush
84 * 5. Unmapped from GTT
85 * i915_gem_object_unbind calls set_domain (CPU, CPU)
86 * flush_domains and invalidate_domains end up both zero
87 * so no flushing/invalidating happens
88 * 6. Freed
89 * yay, done
90 *
91 * Case 2: The shared render buffer
92 *
93 * 1. Allocated
94 * 2. Mapped to GTT
95 * 3. Read/written by GPU
96 * 4. set_domain to (CPU,CPU)
97 * 5. Read/written by CPU
98 * 6. Read/written by GPU
99 *
100 * 1. Allocated
101 * Same as last example, (CPU, CPU)
102 * 2. Mapped to GTT
103 * Nothing changes (assertions find that it is not in the GPU)
104 * 3. Read/written by GPU
105 * execbuffer calls set_domain (RENDER, RENDER)
106 * flush_domains gets CPU
107 * invalidate_domains gets GPU
108 * clflush (obj)
109 * MI_FLUSH and drm_agp_chipset_flush
110 * 4. set_domain (CPU, CPU)
111 * flush_domains gets GPU
112 * invalidate_domains gets CPU
113 * wait_rendering (obj) to make sure all drawing is complete.
114 * This will include an MI_FLUSH to get the data from GPU
115 * to memory
116 * clflush (obj) to invalidate the CPU cache
117 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
118 * 5. Read/written by CPU
119 * cache lines are loaded and dirtied
120 * 6. Read written by GPU
121 * Same as last GPU access
122 *
123 * Case 3: The constant buffer
124 *
125 * 1. Allocated
126 * 2. Written by CPU
127 * 3. Read by GPU
128 * 4. Updated (written) by CPU again
129 * 5. Read by GPU
130 *
131 * 1. Allocated
132 * (CPU, CPU)
133 * 2. Written by CPU
134 * (CPU, CPU)
135 * 3. Read by GPU
136 * (CPU+RENDER, 0)
137 * flush_domains = CPU
138 * invalidate_domains = RENDER
139 * clflush (obj)
140 * MI_FLUSH
141 * drm_agp_chipset_flush
142 * 4. Updated (written) by CPU again
143 * (CPU, CPU)
144 * flush_domains = 0 (no previous write domain)
145 * invalidate_domains = 0 (no new read domains)
146 * 5. Read by GPU
147 * (CPU+RENDER, 0)
148 * flush_domains = CPU
149 * invalidate_domains = RENDER
150 * clflush (obj)
151 * MI_FLUSH
152 * drm_agp_chipset_flush
153 */
154static void
155i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
156 struct intel_ring_buffer *ring,
157 struct change_domains *cd)
158{
159 uint32_t invalidate_domains = 0, flush_domains = 0;
160
161 /*
162 * If the object isn't moving to a new write domain,
163 * let the object stay in multiple read domains
164 */
165 if (obj->base.pending_write_domain == 0)
166 obj->base.pending_read_domains |= obj->base.read_domains;
167
168 /*
169 * Flush the current write domain if
170 * the new read domains don't match. Invalidate
171 * any read domains which differ from the old
172 * write domain
173 */
174 if (obj->base.write_domain &&
175 (((obj->base.write_domain != obj->base.pending_read_domains ||
176 obj->ring != ring)) ||
177 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
178 flush_domains |= obj->base.write_domain;
179 invalidate_domains |=
180 obj->base.pending_read_domains & ~obj->base.write_domain;
181 }
182 /*
183 * Invalidate any read caches which may have
184 * stale data. That is, any new read domains.
185 */
186 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
187 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
188 i915_gem_clflush_object(obj);
189
190 if (obj->base.pending_write_domain)
191 cd->flips |= atomic_read(&obj->pending_flip);
192
193 /* The actual obj->write_domain will be updated with
194 * pending_write_domain after we emit the accumulated flush for all
195 * of our domain changes in execbuffers (which clears objects'
196 * write_domains). So if we have a current write domain that we
197 * aren't changing, set pending_write_domain to that.
198 */
199 if (flush_domains == 0 && obj->base.pending_write_domain == 0)
200 obj->base.pending_write_domain = obj->base.write_domain;
201
202 cd->invalidate_domains |= invalidate_domains;
203 cd->flush_domains |= flush_domains;
204 if (flush_domains & I915_GEM_GPU_DOMAINS)
205 cd->flush_rings |= intel_ring_flag(obj->ring);
206 if (invalidate_domains & I915_GEM_GPU_DOMAINS)
207 cd->flush_rings |= intel_ring_flag(ring);
208}
209
210struct eb_objects { 36struct eb_objects {
211 int and; 37 int and;
212 struct hlist_head buckets[0]; 38 struct hlist_head buckets[0];
@@ -217,6 +43,7 @@ eb_create(int size)
217{ 43{
218 struct eb_objects *eb; 44 struct eb_objects *eb;
219 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 45 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
46 BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
220 while (count > size) 47 while (count > size)
221 count >>= 1; 48 count >>= 1;
222 eb = kzalloc(count*sizeof(struct hlist_head) + 49 eb = kzalloc(count*sizeof(struct hlist_head) +
@@ -268,6 +95,7 @@ eb_destroy(struct eb_objects *eb)
268static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 95static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
269{ 96{
270 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 97 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
98 !obj->map_and_fenceable ||
271 obj->cache_level != I915_CACHE_NONE); 99 obj->cache_level != I915_CACHE_NONE);
272} 100}
273 101
@@ -382,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
382 if (ret) 210 if (ret)
383 return ret; 211 return ret;
384 212
385 vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); 213 vaddr = kmap_atomic(i915_gem_object_get_page(obj,
214 reloc->offset >> PAGE_SHIFT));
386 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 215 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
387 kunmap_atomic(vaddr); 216 kunmap_atomic(vaddr);
388 } else { 217 } else {
@@ -503,7 +332,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
503 return ret; 332 return ret;
504} 333}
505 334
506#define __EXEC_OBJECT_HAS_FENCE (1<<31) 335#define __EXEC_OBJECT_HAS_PIN (1<<31)
336#define __EXEC_OBJECT_HAS_FENCE (1<<30)
507 337
508static int 338static int
509need_reloc_mappable(struct drm_i915_gem_object *obj) 339need_reloc_mappable(struct drm_i915_gem_object *obj)
@@ -513,9 +343,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
513} 343}
514 344
515static int 345static int
516pin_and_fence_object(struct drm_i915_gem_object *obj, 346i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
517 struct intel_ring_buffer *ring) 347 struct intel_ring_buffer *ring)
518{ 348{
349 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
519 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 350 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
520 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 351 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
521 bool need_fence, need_mappable; 352 bool need_fence, need_mappable;
@@ -527,15 +358,17 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
527 obj->tiling_mode != I915_TILING_NONE; 358 obj->tiling_mode != I915_TILING_NONE;
528 need_mappable = need_fence || need_reloc_mappable(obj); 359 need_mappable = need_fence || need_reloc_mappable(obj);
529 360
530 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 361 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
531 if (ret) 362 if (ret)
532 return ret; 363 return ret;
533 364
365 entry->flags |= __EXEC_OBJECT_HAS_PIN;
366
534 if (has_fenced_gpu_access) { 367 if (has_fenced_gpu_access) {
535 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 368 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
536 ret = i915_gem_object_get_fence(obj); 369 ret = i915_gem_object_get_fence(obj);
537 if (ret) 370 if (ret)
538 goto err_unpin; 371 return ret;
539 372
540 if (i915_gem_object_pin_fence(obj)) 373 if (i915_gem_object_pin_fence(obj))
541 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 374 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -544,12 +377,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
544 } 377 }
545 } 378 }
546 379
380 /* Ensure ppgtt mapping exists if needed */
381 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
382 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
383 obj, obj->cache_level);
384
385 obj->has_aliasing_ppgtt_mapping = 1;
386 }
387
547 entry->offset = obj->gtt_offset; 388 entry->offset = obj->gtt_offset;
548 return 0; 389 return 0;
390}
549 391
550err_unpin: 392static void
551 i915_gem_object_unpin(obj); 393i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
552 return ret; 394{
395 struct drm_i915_gem_exec_object2 *entry;
396
397 if (!obj->gtt_space)
398 return;
399
400 entry = obj->exec_entry;
401
402 if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
403 i915_gem_object_unpin_fence(obj);
404
405 if (entry->flags & __EXEC_OBJECT_HAS_PIN)
406 i915_gem_object_unpin(obj);
407
408 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
553} 409}
554 410
555static int 411static int
@@ -557,11 +413,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
557 struct drm_file *file, 413 struct drm_file *file,
558 struct list_head *objects) 414 struct list_head *objects)
559{ 415{
560 drm_i915_private_t *dev_priv = ring->dev->dev_private;
561 struct drm_i915_gem_object *obj; 416 struct drm_i915_gem_object *obj;
562 int ret, retry;
563 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
564 struct list_head ordered_objects; 417 struct list_head ordered_objects;
418 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
419 int retry;
565 420
566 INIT_LIST_HEAD(&ordered_objects); 421 INIT_LIST_HEAD(&ordered_objects);
567 while (!list_empty(objects)) { 422 while (!list_empty(objects)) {
@@ -586,6 +441,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
586 441
587 obj->base.pending_read_domains = 0; 442 obj->base.pending_read_domains = 0;
588 obj->base.pending_write_domain = 0; 443 obj->base.pending_write_domain = 0;
444 obj->pending_fenced_gpu_access = false;
589 } 445 }
590 list_splice(&ordered_objects, objects); 446 list_splice(&ordered_objects, objects);
591 447
@@ -598,12 +454,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
598 * 2. Bind new objects. 454 * 2. Bind new objects.
599 * 3. Decrement pin count. 455 * 3. Decrement pin count.
600 * 456 *
601 * This avoid unnecessary unbinding of later objects in order to makr 457 * This avoid unnecessary unbinding of later objects in order to make
602 * room for the earlier objects *unless* we need to defragment. 458 * room for the earlier objects *unless* we need to defragment.
603 */ 459 */
604 retry = 0; 460 retry = 0;
605 do { 461 do {
606 ret = 0; 462 int ret = 0;
607 463
608 /* Unbind any ill-fitting objects or pin. */ 464 /* Unbind any ill-fitting objects or pin. */
609 list_for_each_entry(obj, objects, exec_list) { 465 list_for_each_entry(obj, objects, exec_list) {
@@ -623,7 +479,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
623 (need_mappable && !obj->map_and_fenceable)) 479 (need_mappable && !obj->map_and_fenceable))
624 ret = i915_gem_object_unbind(obj); 480 ret = i915_gem_object_unbind(obj);
625 else 481 else
626 ret = pin_and_fence_object(obj, ring); 482 ret = i915_gem_execbuffer_reserve_object(obj, ring);
627 if (ret) 483 if (ret)
628 goto err; 484 goto err;
629 } 485 }
@@ -633,77 +489,22 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
633 if (obj->gtt_space) 489 if (obj->gtt_space)
634 continue; 490 continue;
635 491
636 ret = pin_and_fence_object(obj, ring); 492 ret = i915_gem_execbuffer_reserve_object(obj, ring);
637 if (ret) { 493 if (ret)
638 int ret_ignore; 494 goto err;
639
640 /* This can potentially raise a harmless
641 * -EINVAL if we failed to bind in the above
642 * call. It cannot raise -EINTR since we know
643 * that the bo is freshly bound and so will
644 * not need to be flushed or waited upon.
645 */
646 ret_ignore = i915_gem_object_unbind(obj);
647 (void)ret_ignore;
648 WARN_ON(obj->gtt_space);
649 break;
650 }
651 } 495 }
652 496
653 /* Decrement pin count for bound objects */ 497err: /* Decrement pin count for bound objects */
654 list_for_each_entry(obj, objects, exec_list) { 498 list_for_each_entry(obj, objects, exec_list)
655 struct drm_i915_gem_exec_object2 *entry; 499 i915_gem_execbuffer_unreserve_object(obj);
656
657 if (!obj->gtt_space)
658 continue;
659
660 entry = obj->exec_entry;
661 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
662 i915_gem_object_unpin_fence(obj);
663 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
664 }
665
666 i915_gem_object_unpin(obj);
667
668 /* ... and ensure ppgtt mapping exist if needed. */
669 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
670 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
671 obj, obj->cache_level);
672 500
673 obj->has_aliasing_ppgtt_mapping = 1; 501 if (ret != -ENOSPC || retry++)
674 }
675 }
676
677 if (ret != -ENOSPC || retry > 1)
678 return ret; 502 return ret;
679 503
680 /* First attempt, just clear anything that is purgeable. 504 ret = i915_gem_evict_everything(ring->dev);
681 * Second attempt, clear the entire GTT.
682 */
683 ret = i915_gem_evict_everything(ring->dev, retry == 0);
684 if (ret) 505 if (ret)
685 return ret; 506 return ret;
686
687 retry++;
688 } while (1); 507 } while (1);
689
690err:
691 list_for_each_entry_continue_reverse(obj, objects, exec_list) {
692 struct drm_i915_gem_exec_object2 *entry;
693
694 if (!obj->gtt_space)
695 continue;
696
697 entry = obj->exec_entry;
698 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
699 i915_gem_object_unpin_fence(obj);
700 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
701 }
702
703 i915_gem_object_unpin(obj);
704 }
705
706 return ret;
707} 508}
708 509
709static int 510static int
@@ -809,18 +610,6 @@ err:
809 return ret; 610 return ret;
810} 611}
811 612
812static void
813i915_gem_execbuffer_flush(struct drm_device *dev,
814 uint32_t invalidate_domains,
815 uint32_t flush_domains)
816{
817 if (flush_domains & I915_GEM_DOMAIN_CPU)
818 intel_gtt_chipset_flush();
819
820 if (flush_domains & I915_GEM_DOMAIN_GTT)
821 wmb();
822}
823
824static int 613static int
825i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 614i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
826{ 615{
@@ -853,48 +642,45 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
853 return 0; 642 return 0;
854} 643}
855 644
856
857static int 645static int
858i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 646i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
859 struct list_head *objects) 647 struct list_head *objects)
860{ 648{
861 struct drm_i915_gem_object *obj; 649 struct drm_i915_gem_object *obj;
862 struct change_domains cd; 650 uint32_t flush_domains = 0;
651 uint32_t flips = 0;
863 int ret; 652 int ret;
864 653
865 memset(&cd, 0, sizeof(cd)); 654 list_for_each_entry(obj, objects, exec_list) {
866 list_for_each_entry(obj, objects, exec_list) 655 ret = i915_gem_object_sync(obj, ring);
867 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
868
869 if (cd.invalidate_domains | cd.flush_domains) {
870 i915_gem_execbuffer_flush(ring->dev,
871 cd.invalidate_domains,
872 cd.flush_domains);
873 }
874
875 if (cd.flips) {
876 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
877 if (ret) 656 if (ret)
878 return ret; 657 return ret;
658
659 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
660 i915_gem_clflush_object(obj);
661
662 if (obj->base.pending_write_domain)
663 flips |= atomic_read(&obj->pending_flip);
664
665 flush_domains |= obj->base.write_domain;
879 } 666 }
880 667
881 list_for_each_entry(obj, objects, exec_list) { 668 if (flips) {
882 ret = i915_gem_object_sync(obj, ring); 669 ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
883 if (ret) 670 if (ret)
884 return ret; 671 return ret;
885 } 672 }
886 673
674 if (flush_domains & I915_GEM_DOMAIN_CPU)
675 intel_gtt_chipset_flush();
676
677 if (flush_domains & I915_GEM_DOMAIN_GTT)
678 wmb();
679
887 /* Unconditionally invalidate gpu caches and ensure that we do flush 680 /* Unconditionally invalidate gpu caches and ensure that we do flush
888 * any residual writes from the previous batch. 681 * any residual writes from the previous batch.
889 */ 682 */
890 ret = i915_gem_flush_ring(ring, 683 return intel_ring_invalidate_all_caches(ring);
891 I915_GEM_GPU_DOMAINS,
892 ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0);
893 if (ret)
894 return ret;
895
896 ring->gpu_caches_dirty = false;
897 return 0;
898} 684}
899 685
900static bool 686static bool
@@ -942,9 +728,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
942 struct drm_i915_gem_object *obj; 728 struct drm_i915_gem_object *obj;
943 729
944 list_for_each_entry(obj, objects, exec_list) { 730 list_for_each_entry(obj, objects, exec_list) {
945 u32 old_read = obj->base.read_domains; 731 u32 old_read = obj->base.read_domains;
946 u32 old_write = obj->base.write_domain; 732 u32 old_write = obj->base.write_domain;
947
948 733
949 obj->base.read_domains = obj->base.pending_read_domains; 734 obj->base.read_domains = obj->base.pending_read_domains;
950 obj->base.write_domain = obj->base.pending_write_domain; 735 obj->base.write_domain = obj->base.pending_write_domain;
@@ -953,17 +738,13 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
953 i915_gem_object_move_to_active(obj, ring, seqno); 738 i915_gem_object_move_to_active(obj, ring, seqno);
954 if (obj->base.write_domain) { 739 if (obj->base.write_domain) {
955 obj->dirty = 1; 740 obj->dirty = 1;
956 obj->pending_gpu_write = true; 741 obj->last_write_seqno = seqno;
957 list_move_tail(&obj->gpu_write_list,
958 &ring->gpu_write_list);
959 if (obj->pin_count) /* check for potential scanout */ 742 if (obj->pin_count) /* check for potential scanout */
960 intel_mark_busy(ring->dev, obj); 743 intel_mark_fb_busy(obj);
961 } 744 }
962 745
963 trace_i915_gem_object_change_domain(obj, old_read, old_write); 746 trace_i915_gem_object_change_domain(obj, old_read, old_write);
964 } 747 }
965
966 intel_mark_busy(ring->dev, NULL);
967} 748}
968 749
969static void 750static void
@@ -971,16 +752,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
971 struct drm_file *file, 752 struct drm_file *file,
972 struct intel_ring_buffer *ring) 753 struct intel_ring_buffer *ring)
973{ 754{
974 struct drm_i915_gem_request *request;
975
976 /* Unconditionally force add_request to emit a full flush. */ 755 /* Unconditionally force add_request to emit a full flush. */
977 ring->gpu_caches_dirty = true; 756 ring->gpu_caches_dirty = true;
978 757
979 /* Add a breadcrumb for the completion of the batch buffer */ 758 /* Add a breadcrumb for the completion of the batch buffer */
980 request = kzalloc(sizeof(*request), GFP_KERNEL); 759 (void)i915_add_request(ring, file, NULL);
981 if (request == NULL || i915_add_request(ring, file, request)) {
982 kfree(request);
983 }
984} 760}
985 761
986static int 762static int
@@ -1326,8 +1102,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
1326 return -ENOMEM; 1102 return -ENOMEM;
1327 } 1103 }
1328 ret = copy_from_user(exec_list, 1104 ret = copy_from_user(exec_list,
1329 (struct drm_i915_relocation_entry __user *) 1105 (void __user *)(uintptr_t)args->buffers_ptr,
1330 (uintptr_t) args->buffers_ptr,
1331 sizeof(*exec_list) * args->buffer_count); 1106 sizeof(*exec_list) * args->buffer_count);
1332 if (ret != 0) { 1107 if (ret != 0) {
1333 DRM_DEBUG("copy %d exec entries failed %d\n", 1108 DRM_DEBUG("copy %d exec entries failed %d\n",
@@ -1366,8 +1141,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
1366 for (i = 0; i < args->buffer_count; i++) 1141 for (i = 0; i < args->buffer_count; i++)
1367 exec_list[i].offset = exec2_list[i].offset; 1142 exec_list[i].offset = exec2_list[i].offset;
1368 /* ... and back out to userspace */ 1143 /* ... and back out to userspace */
1369 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 1144 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1370 (uintptr_t) args->buffers_ptr,
1371 exec_list, 1145 exec_list,
1372 sizeof(*exec_list) * args->buffer_count); 1146 sizeof(*exec_list) * args->buffer_count);
1373 if (ret) { 1147 if (ret) {
@@ -1421,8 +1195,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
1421 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1195 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1422 if (!ret) { 1196 if (!ret) {
1423 /* Copy the new buffer offsets back to the user's exec list. */ 1197 /* Copy the new buffer offsets back to the user's exec list. */
1424 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 1198 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1425 (uintptr_t) args->buffers_ptr,
1426 exec2_list, 1199 exec2_list,
1427 sizeof(*exec2_list) * args->buffer_count); 1200 sizeof(*exec2_list) * args->buffer_count);
1428 if (ret) { 1201 if (ret) {