aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2012-07-21 06:25:01 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-07-25 12:23:54 -0400
commit6ac42f4148bc27e5ffd18a9ab0eac57f58822af4 (patch)
tree7777f5cdc8eb372657c3435c3db5f089b91738c0 /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parent26b9c4a57fc3ff0ae6032548870bebfa5cd0de3d (diff)
drm/i915: Replace the complex flushing logic with simple invalidate/flush all
Now that we unconditionally flush and invalidate between every batch buffer, we no longer need the complex logic to decide which domains require flushing. Remove it and rejoice. v2 (danvet): Keep around the flip waiting logic. It's gross and broken, I know, but we can't just kill that thing ... even if we just keep it around as a reminder that things are broken. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c222
1 files changed, 20 insertions, 202 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 36c940c1a97..6c810798de9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,180 +34,6 @@
34#include "intel_drv.h" 34#include "intel_drv.h"
35#include <linux/dma_remapping.h> 35#include <linux/dma_remapping.h>
36 36
37struct change_domains {
38 uint32_t invalidate_domains;
39 uint32_t flush_domains;
40 uint32_t flush_rings;
41 uint32_t flips;
42};
43
44/*
45 * Set the next domain for the specified object. This
46 * may not actually perform the necessary flushing/invaliding though,
47 * as that may want to be batched with other set_domain operations
48 *
49 * This is (we hope) the only really tricky part of gem. The goal
50 * is fairly simple -- track which caches hold bits of the object
51 * and make sure they remain coherent. A few concrete examples may
52 * help to explain how it works. For shorthand, we use the notation
53 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
54 * a pair of read and write domain masks.
55 *
56 * Case 1: the batch buffer
57 *
58 * 1. Allocated
59 * 2. Written by CPU
60 * 3. Mapped to GTT
61 * 4. Read by GPU
62 * 5. Unmapped from GTT
63 * 6. Freed
64 *
65 * Let's take these a step at a time
66 *
67 * 1. Allocated
68 * Pages allocated from the kernel may still have
69 * cache contents, so we set them to (CPU, CPU) always.
70 * 2. Written by CPU (using pwrite)
71 * The pwrite function calls set_domain (CPU, CPU) and
72 * this function does nothing (as nothing changes)
73 * 3. Mapped by GTT
74 * This function asserts that the object is not
75 * currently in any GPU-based read or write domains
76 * 4. Read by GPU
77 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
78 * As write_domain is zero, this function adds in the
79 * current read domains (CPU+COMMAND, 0).
80 * flush_domains is set to CPU.
81 * invalidate_domains is set to COMMAND
82 * clflush is run to get data out of the CPU caches
83 * then i915_dev_set_domain calls i915_gem_flush to
84 * emit an MI_FLUSH and drm_agp_chipset_flush
85 * 5. Unmapped from GTT
86 * i915_gem_object_unbind calls set_domain (CPU, CPU)
87 * flush_domains and invalidate_domains end up both zero
88 * so no flushing/invalidating happens
89 * 6. Freed
90 * yay, done
91 *
92 * Case 2: The shared render buffer
93 *
94 * 1. Allocated
95 * 2. Mapped to GTT
96 * 3. Read/written by GPU
97 * 4. set_domain to (CPU,CPU)
98 * 5. Read/written by CPU
99 * 6. Read/written by GPU
100 *
101 * 1. Allocated
102 * Same as last example, (CPU, CPU)
103 * 2. Mapped to GTT
104 * Nothing changes (assertions find that it is not in the GPU)
105 * 3. Read/written by GPU
106 * execbuffer calls set_domain (RENDER, RENDER)
107 * flush_domains gets CPU
108 * invalidate_domains gets GPU
109 * clflush (obj)
110 * MI_FLUSH and drm_agp_chipset_flush
111 * 4. set_domain (CPU, CPU)
112 * flush_domains gets GPU
113 * invalidate_domains gets CPU
114 * wait_rendering (obj) to make sure all drawing is complete.
115 * This will include an MI_FLUSH to get the data from GPU
116 * to memory
117 * clflush (obj) to invalidate the CPU cache
118 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
119 * 5. Read/written by CPU
120 * cache lines are loaded and dirtied
121 * 6. Read written by GPU
122 * Same as last GPU access
123 *
124 * Case 3: The constant buffer
125 *
126 * 1. Allocated
127 * 2. Written by CPU
128 * 3. Read by GPU
129 * 4. Updated (written) by CPU again
130 * 5. Read by GPU
131 *
132 * 1. Allocated
133 * (CPU, CPU)
134 * 2. Written by CPU
135 * (CPU, CPU)
136 * 3. Read by GPU
137 * (CPU+RENDER, 0)
138 * flush_domains = CPU
139 * invalidate_domains = RENDER
140 * clflush (obj)
141 * MI_FLUSH
142 * drm_agp_chipset_flush
143 * 4. Updated (written) by CPU again
144 * (CPU, CPU)
145 * flush_domains = 0 (no previous write domain)
146 * invalidate_domains = 0 (no new read domains)
147 * 5. Read by GPU
148 * (CPU+RENDER, 0)
149 * flush_domains = CPU
150 * invalidate_domains = RENDER
151 * clflush (obj)
152 * MI_FLUSH
153 * drm_agp_chipset_flush
154 */
155static void
156i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
157 struct intel_ring_buffer *ring,
158 struct change_domains *cd)
159{
160 uint32_t invalidate_domains = 0, flush_domains = 0;
161
162 /*
163 * If the object isn't moving to a new write domain,
164 * let the object stay in multiple read domains
165 */
166 if (obj->base.pending_write_domain == 0)
167 obj->base.pending_read_domains |= obj->base.read_domains;
168
169 /*
170 * Flush the current write domain if
171 * the new read domains don't match. Invalidate
172 * any read domains which differ from the old
173 * write domain
174 */
175 if (obj->base.write_domain &&
176 (((obj->base.write_domain != obj->base.pending_read_domains ||
177 obj->ring != ring)) ||
178 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
179 flush_domains |= obj->base.write_domain;
180 invalidate_domains |=
181 obj->base.pending_read_domains & ~obj->base.write_domain;
182 }
183 /*
184 * Invalidate any read caches which may have
185 * stale data. That is, any new read domains.
186 */
187 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
188 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
189 i915_gem_clflush_object(obj);
190
191 if (obj->base.pending_write_domain)
192 cd->flips |= atomic_read(&obj->pending_flip);
193
194 /* The actual obj->write_domain will be updated with
195 * pending_write_domain after we emit the accumulated flush for all
196 * of our domain changes in execbuffers (which clears objects'
197 * write_domains). So if we have a current write domain that we
198 * aren't changing, set pending_write_domain to that.
199 */
200 if (flush_domains == 0 && obj->base.pending_write_domain == 0)
201 obj->base.pending_write_domain = obj->base.write_domain;
202
203 cd->invalidate_domains |= invalidate_domains;
204 cd->flush_domains |= flush_domains;
205 if (flush_domains & I915_GEM_GPU_DOMAINS)
206 cd->flush_rings |= intel_ring_flag(obj->ring);
207 if (invalidate_domains & I915_GEM_GPU_DOMAINS)
208 cd->flush_rings |= intel_ring_flag(ring);
209}
210
211struct eb_objects { 37struct eb_objects {
212 int and; 38 int and;
213 struct hlist_head buckets[0]; 39 struct hlist_head buckets[0];
@@ -810,18 +636,6 @@ err:
810 return ret; 636 return ret;
811} 637}
812 638
813static void
814i915_gem_execbuffer_flush(struct drm_device *dev,
815 uint32_t invalidate_domains,
816 uint32_t flush_domains)
817{
818 if (flush_domains & I915_GEM_DOMAIN_CPU)
819 intel_gtt_chipset_flush();
820
821 if (flush_domains & I915_GEM_DOMAIN_GTT)
822 wmb();
823}
824
825static int 639static int
826i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 640i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
827{ 641{
@@ -854,37 +668,41 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
854 return 0; 668 return 0;
855} 669}
856 670
857
858static int 671static int
859i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 672i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
860 struct list_head *objects) 673 struct list_head *objects)
861{ 674{
862 struct drm_i915_gem_object *obj; 675 struct drm_i915_gem_object *obj;
863 struct change_domains cd; 676 uint32_t flush_domains = 0;
677 uint32_t flips = 0;
864 int ret; 678 int ret;
865 679
866 memset(&cd, 0, sizeof(cd)); 680 list_for_each_entry(obj, objects, exec_list) {
867 list_for_each_entry(obj, objects, exec_list) 681 ret = i915_gem_object_sync(obj, ring);
868 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
869
870 if (cd.invalidate_domains | cd.flush_domains) {
871 i915_gem_execbuffer_flush(ring->dev,
872 cd.invalidate_domains,
873 cd.flush_domains);
874 }
875
876 if (cd.flips) {
877 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
878 if (ret) 682 if (ret)
879 return ret; 683 return ret;
684
685 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
686 i915_gem_clflush_object(obj);
687
688 if (obj->base.pending_write_domain)
689 flips |= atomic_read(&obj->pending_flip);
690
691 flush_domains |= obj->base.write_domain;
880 } 692 }
881 693
882 list_for_each_entry(obj, objects, exec_list) { 694 if (flips) {
883 ret = i915_gem_object_sync(obj, ring); 695 ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
884 if (ret) 696 if (ret)
885 return ret; 697 return ret;
886 } 698 }
887 699
700 if (flush_domains & I915_GEM_DOMAIN_CPU)
701 intel_gtt_chipset_flush();
702
703 if (flush_domains & I915_GEM_DOMAIN_GTT)
704 wmb();
705
888 /* Unconditionally invalidate gpu caches and ensure that we do flush 706 /* Unconditionally invalidate gpu caches and ensure that we do flush
889 * any residual writes from the previous batch. 707 * any residual writes from the previous batch.
890 */ 708 */