drm/i915: Replace the complex flushing logic with simple invalidate/flush all

Now that we unconditionally flush and invalidate between every batch buffer, we no longer need the complex logic to decide which domains require flushing. Remove it and rejoice. v2 (danvet): Keep around the flip waiting logic. It's gross and broken, I know, but we can't just kill that thing ... even if we just keep it around as a reminder that things are broken. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
author: Daniel Vetter <daniel.vetter@ffwll.ch> 2012-07-21 06:25:01 -0400
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2012-07-25 12:23:54 -0400
commit: 6ac42f4148bc27e5ffd18a9ab0eac57f58822af4 (patch)
tree: 7777f5cdc8eb372657c3435c3db5f089b91738c0 /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parent: 26b9c4a57fc3ff0ae6032548870bebfa5cd0de3d (diff)
1 files changed, 20 insertions, 202 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 36c940c1a97..6c810798de9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,180 +34,6 @@
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
-struct change_domains {
-        uint32_t invalidate_domains;
-        uint32_t flush_domains;
-        uint32_t flush_rings;
-        uint32_t flips;
-};
-/*
- * Set the next domain for the specified object. This
- * may not actually perform the necessary flushing/invaliding though,
- * as that may want to be batched with other set_domain operations
- *
- * This is (we hope) the only really tricky part of gem. The goal
- * is fairly simple -- track which caches hold bits of the object
- * and make sure they remain coherent. A few concrete examples may
- * help to explain how it works. For shorthand, we use the notation
- * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
- * a pair of read and write domain masks.
- *
- * Case 1: the batch buffer
- *
- *      1. Allocated
- *      2. Written by CPU
- *      3. Mapped to GTT
- *      4. Read by GPU
- *      5. Unmapped from GTT
- *      6. Freed
- *
- *      Let's take these a step at a time
- *
- *      1. Allocated
- *              Pages allocated from the kernel may still have
- *              cache contents, so we set them to (CPU, CPU) always.
- *      2. Written by CPU (using pwrite)
- *              The pwrite function calls set_domain (CPU, CPU) and
- *              this function does nothing (as nothing changes)
- *      3. Mapped by GTT
- *              This function asserts that the object is not
- *              currently in any GPU-based read or write domains
- *      4. Read by GPU
- *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
- *              As write_domain is zero, this function adds in the
- *              current read domains (CPU+COMMAND, 0).
- *              flush_domains is set to CPU.
- *              invalidate_domains is set to COMMAND
- *              clflush is run to get data out of the CPU caches
- *              then i915_dev_set_domain calls i915_gem_flush to
- *              emit an MI_FLUSH and drm_agp_chipset_flush
- *      5. Unmapped from GTT
- *              i915_gem_object_unbind calls set_domain (CPU, CPU)
- *              flush_domains and invalidate_domains end up both zero
- *              so no flushing/invalidating happens
- *      6. Freed
- *              yay, done
- *
- * Case 2: The shared render buffer
- *
- *      1. Allocated
- *      2. Mapped to GTT
- *      3. Read/written by GPU
- *      4. set_domain to (CPU,CPU)
- *      5. Read/written by CPU
- *      6. Read/written by GPU
- *
- *      1. Allocated
- *              Same as last example, (CPU, CPU)
- *      2. Mapped to GTT
- *              Nothing changes (assertions find that it is not in the GPU)
- *      3. Read/written by GPU
- *              execbuffer calls set_domain (RENDER, RENDER)
- *              flush_domains gets CPU
- *              invalidate_domains gets GPU
- *              clflush (obj)
- *              MI_FLUSH and drm_agp_chipset_flush
- *      4. set_domain (CPU, CPU)
- *              flush_domains gets GPU
- *              invalidate_domains gets CPU
- *              wait_rendering (obj) to make sure all drawing is complete.
- *              This will include an MI_FLUSH to get the data from GPU
- *              to memory
- *              clflush (obj) to invalidate the CPU cache
- *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
- *      5. Read/written by CPU
- *              cache lines are loaded and dirtied
- *      6. Read written by GPU
- *              Same as last GPU access
- *
- * Case 3: The constant buffer
- *
- *      1. Allocated
- *      2. Written by CPU
- *      3. Read by GPU
- *      4. Updated (written) by CPU again
- *      5. Read by GPU
- *
- *      1. Allocated
- *              (CPU, CPU)
- *      2. Written by CPU
- *              (CPU, CPU)
- *      3. Read by GPU
- *              (CPU+RENDER, 0)
- *              flush_domains = CPU
- *              invalidate_domains = RENDER
- *              clflush (obj)
- *              MI_FLUSH
- *              drm_agp_chipset_flush
- *      4. Updated (written) by CPU again
- *              (CPU, CPU)
- *              flush_domains = 0 (no previous write domain)
- *              invalidate_domains = 0 (no new read domains)
- *      5. Read by GPU
- *              (CPU+RENDER, 0)
- *              flush_domains = CPU
- *              invalidate_domains = RENDER
- *              clflush (obj)
- *              MI_FLUSH
- *              drm_agp_chipset_flush
- */
-static void
-i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
-                                  struct intel_ring_buffer *ring,
-                                  struct change_domains *cd)
-{
-        uint32_t invalidate_domains = 0, flush_domains = 0;
-        /*
-         * If the object isn't moving to a new write domain,
-         * let the object stay in multiple read domains
-         */
-        if (obj->base.pending_write_domain == 0)
-                obj->base.pending_read_domains |= obj->base.read_domains;
-        /*
-         * Flush the current write domain if
-         * the new read domains don't match. Invalidate
-         * any read domains which differ from the old
-         * write domain
-         */
-        if (obj->base.write_domain &&
-            (((obj->base.write_domain != obj->base.pending_read_domains ||
-               obj->ring != ring)) ||
-             (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
-                flush_domains |= obj->base.write_domain;
-                invalidate_domains |=
-                        obj->base.pending_read_domains & ~obj->base.write_domain;
-        }
-        /*
-         * Invalidate any read caches which may have
-         * stale data. That is, any new read domains.
-         */
-        invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
-        if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
-                i915_gem_clflush_object(obj);
-        if (obj->base.pending_write_domain)
-                cd->flips |= atomic_read(&obj->pending_flip);
-        /* The actual obj->write_domain will be updated with
-         * pending_write_domain after we emit the accumulated flush for all
-         * of our domain changes in execbuffers (which clears objects'
-         * write_domains).  So if we have a current write domain that we
-         * aren't changing, set pending_write_domain to that.
-         */
-        if (flush_domains == 0 && obj->base.pending_write_domain == 0)
-                obj->base.pending_write_domain = obj->base.write_domain;
-        cd->invalidate_domains |= invalidate_domains;
-        cd->flush_domains |= flush_domains;
-        if (flush_domains & I915_GEM_GPU_DOMAINS)
-                cd->flush_rings |= intel_ring_flag(obj->ring);
-        if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-                cd->flush_rings |= intel_ring_flag(ring);
-}
 struct eb_objects {
        int and;
        struct hlist_head buckets[0];
@@ -810,18 +636,6 @@ err:
        return ret;
 }
-static void
-i915_gem_execbuffer_flush(struct drm_device *dev,
-                          uint32_t invalidate_domains,
-                          uint32_t flush_domains)
-{
-        if (flush_domains & I915_GEM_DOMAIN_CPU)
-                intel_gtt_chipset_flush();
-        if (flush_domains & I915_GEM_DOMAIN_GTT)
-                wmb();
-}
 static int
 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
 {
@@ -854,37 +668,41 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
        return 0;
 }
 static int
 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
                                struct list_head *objects)
 {
        struct drm_i915_gem_object *obj;
-        struct change_domains cd;
+        uint32_t flush_domains = 0;
+        uint32_t flips = 0;
        int ret;
-        memset(&cd, 0, sizeof(cd));
+        list_for_each_entry(obj, objects, exec_list) {
-        list_for_each_entry(obj, objects, exec_list)
+                ret = i915_gem_object_sync(obj, ring);
-                i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
-        if (cd.invalidate_domains | cd.flush_domains) {
-                i915_gem_execbuffer_flush(ring->dev,
-                                          cd.invalidate_domains,
-                                          cd.flush_domains);
-        }
-        if (cd.flips) {
-                ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
                if (ret)
                        return ret;
+                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
+                        i915_gem_clflush_object(obj);
+                if (obj->base.pending_write_domain)
+                        flips |= atomic_read(&obj->pending_flip);
+                flush_domains |= obj->base.write_domain;
        }
-        list_for_each_entry(obj, objects, exec_list) {
+        if (flips) {
-                ret = i915_gem_object_sync(obj, ring);
+                ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
                if (ret)
                        return ret;
        }
+        if (flush_domains & I915_GEM_DOMAIN_CPU)
+                intel_gtt_chipset_flush();
+        if (flush_domains & I915_GEM_DOMAIN_GTT)
+                wmb();
        /* Unconditionally invalidate gpu caches and ensure that we do flush
         * any residual writes from the previous batch.
         */
author	Daniel Vetter <daniel.vetter@ffwll.ch>	2012-07-21 06:25:01 -0400
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2012-07-25 12:23:54 -0400
commit	6ac42f4148bc27e5ffd18a9ab0eac57f58822af4 (patch)
tree	7777f5cdc8eb372657c3435c3db5f089b91738c0 /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parent	26b9c4a57fc3ff0ae6032548870bebfa5cd0de3d (diff)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 36c940c1a97..6c810798de9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,180 +34,6 @@
34	#include "intel_drv.h"	34	#include "intel_drv.h"
35	#include <linux/dma_remapping.h>	35	#include <linux/dma_remapping.h>
36		36
37	struct change_domains {
38	uint32_t invalidate_domains;
39	uint32_t flush_domains;
40	uint32_t flush_rings;
41	uint32_t flips;
42	};
43
44	/*
45	* Set the next domain for the specified object. This
46	* may not actually perform the necessary flushing/invaliding though,
47	* as that may want to be batched with other set_domain operations
48	*
49	* This is (we hope) the only really tricky part of gem. The goal
50	* is fairly simple -- track which caches hold bits of the object
51	* and make sure they remain coherent. A few concrete examples may
52	* help to explain how it works. For shorthand, we use the notation
53	* (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
54	* a pair of read and write domain masks.
55	*
56	* Case 1: the batch buffer
57	*
58	* 1. Allocated
59	* 2. Written by CPU
60	* 3. Mapped to GTT
61	* 4. Read by GPU
62	* 5. Unmapped from GTT
63	* 6. Freed
64	*
65	* Let's take these a step at a time
66	*
67	* 1. Allocated
68	* Pages allocated from the kernel may still have
69	* cache contents, so we set them to (CPU, CPU) always.
70	* 2. Written by CPU (using pwrite)
71	* The pwrite function calls set_domain (CPU, CPU) and
72	* this function does nothing (as nothing changes)
73	* 3. Mapped by GTT
74	* This function asserts that the object is not
75	* currently in any GPU-based read or write domains
76	* 4. Read by GPU
77	* i915_gem_execbuffer calls set_domain (COMMAND, 0).
78	* As write_domain is zero, this function adds in the
79	* current read domains (CPU+COMMAND, 0).
80	* flush_domains is set to CPU.
81	* invalidate_domains is set to COMMAND
82	* clflush is run to get data out of the CPU caches
83	* then i915_dev_set_domain calls i915_gem_flush to
84	* emit an MI_FLUSH and drm_agp_chipset_flush
85	* 5. Unmapped from GTT
86	* i915_gem_object_unbind calls set_domain (CPU, CPU)
87	* flush_domains and invalidate_domains end up both zero
88	* so no flushing/invalidating happens
89	* 6. Freed
90	* yay, done
91	*
92	* Case 2: The shared render buffer
93	*
94	* 1. Allocated
95	* 2. Mapped to GTT
96	* 3. Read/written by GPU
97	* 4. set_domain to (CPU,CPU)
98	* 5. Read/written by CPU
99	* 6. Read/written by GPU
100	*
101	* 1. Allocated
102	* Same as last example, (CPU, CPU)
103	* 2. Mapped to GTT
104	* Nothing changes (assertions find that it is not in the GPU)
105	* 3. Read/written by GPU
106	* execbuffer calls set_domain (RENDER, RENDER)
107	* flush_domains gets CPU
108	* invalidate_domains gets GPU
109	* clflush (obj)
110	* MI_FLUSH and drm_agp_chipset_flush
111	* 4. set_domain (CPU, CPU)
112	* flush_domains gets GPU
113	* invalidate_domains gets CPU
114	* wait_rendering (obj) to make sure all drawing is complete.
115	* This will include an MI_FLUSH to get the data from GPU
116	* to memory
117	* clflush (obj) to invalidate the CPU cache
118	* Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
119	* 5. Read/written by CPU
120	* cache lines are loaded and dirtied
121	* 6. Read written by GPU
122	* Same as last GPU access
123	*
124	* Case 3: The constant buffer
125	*
126	* 1. Allocated
127	* 2. Written by CPU
128	* 3. Read by GPU
129	* 4. Updated (written) by CPU again
130	* 5. Read by GPU
131	*
132	* 1. Allocated
133	* (CPU, CPU)
134	* 2. Written by CPU
135	* (CPU, CPU)
136	* 3. Read by GPU
137	* (CPU+RENDER, 0)
138	* flush_domains = CPU
139	* invalidate_domains = RENDER
140	* clflush (obj)
141	* MI_FLUSH
142	* drm_agp_chipset_flush
143	* 4. Updated (written) by CPU again
144	* (CPU, CPU)
145	* flush_domains = 0 (no previous write domain)
146	* invalidate_domains = 0 (no new read domains)
147	* 5. Read by GPU
148	* (CPU+RENDER, 0)
149	* flush_domains = CPU
150	* invalidate_domains = RENDER
151	* clflush (obj)
152	* MI_FLUSH
153	* drm_agp_chipset_flush
154	*/
155	static void
156	i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
157	struct intel_ring_buffer *ring,
158	struct change_domains *cd)
159	{
160	uint32_t invalidate_domains = 0, flush_domains = 0;
161
162	/*
163	* If the object isn't moving to a new write domain,
164	* let the object stay in multiple read domains
165	*/
166	if (obj->base.pending_write_domain == 0)
167	obj->base.pending_read_domains \|= obj->base.read_domains;
168
169	/*
170	* Flush the current write domain if
171	* the new read domains don't match. Invalidate
172	* any read domains which differ from the old
173	* write domain
174	*/
175	if (obj->base.write_domain &&
176	(((obj->base.write_domain != obj->base.pending_read_domains \|\|
177	obj->ring != ring)) \|\|
178	(obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
179	flush_domains \|= obj->base.write_domain;
180	invalidate_domains \|=
181	obj->base.pending_read_domains & ~obj->base.write_domain;
182	}
183	/*
184	* Invalidate any read caches which may have
185	* stale data. That is, any new read domains.
186	*/
187	invalidate_domains \|= obj->base.pending_read_domains & ~obj->base.read_domains;
188	if ((flush_domains \| invalidate_domains) & I915_GEM_DOMAIN_CPU)
189	i915_gem_clflush_object(obj);
190
191	if (obj->base.pending_write_domain)
192	cd->flips \|= atomic_read(&obj->pending_flip);
193
194	/* The actual obj->write_domain will be updated with
195	* pending_write_domain after we emit the accumulated flush for all
196	* of our domain changes in execbuffers (which clears objects'
197	* write_domains). So if we have a current write domain that we
198	* aren't changing, set pending_write_domain to that.
199	*/
200	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
201	obj->base.pending_write_domain = obj->base.write_domain;
202
203	cd->invalidate_domains \|= invalidate_domains;
204	cd->flush_domains \|= flush_domains;
205	if (flush_domains & I915_GEM_GPU_DOMAINS)
206	cd->flush_rings \|= intel_ring_flag(obj->ring);
207	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
208	cd->flush_rings \|= intel_ring_flag(ring);
209	}
210
211	struct eb_objects {	37	struct eb_objects {
212	int and;	38	int and;
213	struct hlist_head buckets[0];	39	struct hlist_head buckets[0];
@@ -810,18 +636,6 @@ err:
810	return ret;	636	return ret;
811	}	637	}
812		638
813	static void
814	i915_gem_execbuffer_flush(struct drm_device *dev,
815	uint32_t invalidate_domains,
816	uint32_t flush_domains)
817	{
818	if (flush_domains & I915_GEM_DOMAIN_CPU)
819	intel_gtt_chipset_flush();
820
821	if (flush_domains & I915_GEM_DOMAIN_GTT)
822	wmb();
823	}
824
825	static int	639	static int
826	i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)	640	i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
827	{	641	{
@@ -854,37 +668,41 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
854	return 0;	668	return 0;
855	}	669	}
856		670
857
858	static int	671	static int
859	i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,	672	i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
860	struct list_head *objects)	673	struct list_head *objects)
861	{	674	{
862	struct drm_i915_gem_object *obj;	675	struct drm_i915_gem_object *obj;
863	struct change_domains cd;	676	uint32_t flush_domains = 0;
		677	uint32_t flips = 0;
864	int ret;	678	int ret;
865		679
866	memset(&cd, 0, sizeof(cd));	680	list_for_each_entry(obj, objects, exec_list) {
867	list_for_each_entry(obj, objects, exec_list)	681	ret = i915_gem_object_sync(obj, ring);
868	i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
869
870	if (cd.invalidate_domains \| cd.flush_domains) {
871	i915_gem_execbuffer_flush(ring->dev,
872	cd.invalidate_domains,
873	cd.flush_domains);
874	}
875
876	if (cd.flips) {
877	ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
878	if (ret)	682	if (ret)
879	return ret;	683	return ret;
		684
		685	if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
		686	i915_gem_clflush_object(obj);
		687
		688	if (obj->base.pending_write_domain)
		689	flips \|= atomic_read(&obj->pending_flip);
		690
		691	flush_domains \|= obj->base.write_domain;
880	}	692	}
881		693
882	list_for_each_entry(obj, objects, exec_list) {	694	if (flips) {
883	ret = i915_gem_object_sync(obj, ring);	695	ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
884	if (ret)	696	if (ret)
885	return ret;	697	return ret;
886	}	698	}
887		699
		700	if (flush_domains & I915_GEM_DOMAIN_CPU)
		701	intel_gtt_chipset_flush();
		702
		703	if (flush_domains & I915_GEM_DOMAIN_GTT)
		704	wmb();
		705
888	/* Unconditionally invalidate gpu caches and ensure that we do flush	706	/* Unconditionally invalidate gpu caches and ensure that we do flush
889	* any residual writes from the previous batch.	707	* any residual writes from the previous batch.
890	*/	708	*/