aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2015-12-11 06:32:59 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2015-12-18 11:11:56 -0500
commit821485dc2ad665f136c57ee589bf7a8210160fe2 (patch)
tree6733fb30ba2abb3ac897d6e53469e7f660f7a16a
parentca5b721e238226af1d767103ac852aeb8e4c0764 (diff)
drm/i915: Only spin whilst waiting on the current request
Limit busywaiting only to the request currently being processed by the GPU. If the request is not currently being processed by the GPU, there is a very low likelihood of it being completed within the 2 microsecond spin timeout and so we will just be wasting CPU cycles. v2: Check for logical inversion when rebasing - we were incorrectly checking for this request being active, and instead busywaiting for when the GPU was not yet processing the request of interest. v3: Try another colour for the seqno names. v4: Another colour for the function names. v5: Remove the forced coherency when checking for the active request. On reflection and plenty of recent experimentation, the issue is not a cache coherency problem - but an irq/seqno ordering problem (timing issue). Here, we do not need the w/a to force ordering of the read with an interrupt. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Eero Tamminen <eero.t.tamminen@intel.com> Cc: "Rantala, Valtteri" <valtteri.rantala@intel.com> Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1449833608-22125-4-git-send-email-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h27
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c8
2 files changed, 26 insertions, 9 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1d28d90ed901..10ec146d37a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2189,8 +2189,17 @@ struct drm_i915_gem_request {
2189 struct drm_i915_private *i915; 2189 struct drm_i915_private *i915;
2190 struct intel_engine_cs *ring; 2190 struct intel_engine_cs *ring;
2191 2191
2192 /** GEM sequence number associated with this request. */ 2192 /** GEM sequence number associated with the previous request,
2193 uint32_t seqno; 2193 * when the HWS breadcrumb is equal to this the GPU is processing
2194 * this request.
2195 */
2196 u32 previous_seqno;
2197
2198 /** GEM sequence number associated with this request,
2199 * when the HWS breadcrumb is equal or greater than this the GPU
2200 * has finished processing this request.
2201 */
2202 u32 seqno;
2194 2203
2195 /** Position in the ringbuffer of the start of the request */ 2204 /** Position in the ringbuffer of the start of the request */
2196 u32 head; 2205 u32 head;
@@ -2969,15 +2978,17 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
2969 return (int32_t)(seq1 - seq2) >= 0; 2978 return (int32_t)(seq1 - seq2) >= 0;
2970} 2979}
2971 2980
2981static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
2982 bool lazy_coherency)
2983{
2984 u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
2985 return i915_seqno_passed(seqno, req->previous_seqno);
2986}
2987
2972static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, 2988static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
2973 bool lazy_coherency) 2989 bool lazy_coherency)
2974{ 2990{
2975 u32 seqno; 2991 u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
2976
2977 BUG_ON(req == NULL);
2978
2979 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
2980
2981 return i915_seqno_passed(seqno, req->seqno); 2992 return i915_seqno_passed(seqno, req->seqno);
2982} 2993}
2983 2994
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e68265b6fb46..d8efc9dfbc48 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1193,9 +1193,13 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1193 * takes to sleep on a request, on the order of a microsecond. 1193 * takes to sleep on a request, on the order of a microsecond.
1194 */ 1194 */
1195 1195
1196 if (i915_gem_request_get_ring(req)->irq_refcount) 1196 if (req->ring->irq_refcount)
1197 return -EBUSY; 1197 return -EBUSY;
1198 1198
1199 /* Only spin if we know the GPU is processing this request */
1200 if (!i915_gem_request_started(req, true))
1201 return -EAGAIN;
1202
1199 timeout = local_clock_us(&cpu) + 5; 1203 timeout = local_clock_us(&cpu) + 5;
1200 while (!need_resched()) { 1204 while (!need_resched()) {
1201 if (i915_gem_request_completed(req, true)) 1205 if (i915_gem_request_completed(req, true))
@@ -1209,6 +1213,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1209 1213
1210 cpu_relax_lowlatency(); 1214 cpu_relax_lowlatency();
1211 } 1215 }
1216
1212 if (i915_gem_request_completed(req, false)) 1217 if (i915_gem_request_completed(req, false))
1213 return 0; 1218 return 0;
1214 1219
@@ -2600,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
2600 request->batch_obj = obj; 2605 request->batch_obj = obj;
2601 2606
2602 request->emitted_jiffies = jiffies; 2607 request->emitted_jiffies = jiffies;
2608 request->previous_seqno = ring->last_submitted_seqno;
2603 ring->last_submitted_seqno = request->seqno; 2609 ring->last_submitted_seqno = request->seqno;
2604 list_add_tail(&request->list, &ring->request_list); 2610 list_add_tail(&request->list, &ring->request_list);
2605 2611