aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2015-12-11 06:32:59 -0500
committerJani Nikula <jani.nikula@intel.com>2015-12-22 05:56:58 -0500
commit0f0cd472062eca6f9fac8be0cd5585f9a2df1ab2 (patch)
tree022d595297dfdb0805dc25ac32cf4ac42ca7bb80 /drivers/gpu
parentf87a780f07b22b6dc4642dbaf44af65112076cb8 (diff)
drm/i915: Only spin whilst waiting on the current request
Limit busywaiting only to the request currently being processed by the GPU. If the request is not currently being processed by the GPU, there is a very low likelihood of it being completed within the 2 microsecond spin timeout and so we will just be wasting CPU cycles. v2: Check for logical inversion when rebasing - we were incorrectly checking for this request being active, and instead busywaiting for when the GPU was not yet processing the request of interest. v3: Try another colour for the seqno names. v4: Another colour for the function names. v5: Remove the forced coherency when checking for the active request. On reflection and plenty of recent experimentation, the issue is not a cache coherency problem - but an irq/seqno ordering problem (timing issue). Here, we do not need the w/a to force ordering of the read with an interrupt. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Eero Tamminen <eero.t.tamminen@intel.com> Cc: "Rantala, Valtteri" <valtteri.rantala@intel.com> Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1449833608-22125-4-git-send-email-chris@chris-wilson.co.uk (cherry picked from commit 821485dc2ad665f136c57ee589bf7a8210160fe2) Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h27
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c8
2 files changed, 26 insertions, 9 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 037a650d6565..f4af19a0d569 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2193,8 +2193,17 @@ struct drm_i915_gem_request {
2193 struct drm_i915_private *i915; 2193 struct drm_i915_private *i915;
2194 struct intel_engine_cs *ring; 2194 struct intel_engine_cs *ring;
2195 2195
2196 /** GEM sequence number associated with this request. */ 2196 /** GEM sequence number associated with the previous request,
2197 uint32_t seqno; 2197 * when the HWS breadcrumb is equal to this the GPU is processing
2198 * this request.
2199 */
2200 u32 previous_seqno;
2201
2202 /** GEM sequence number associated with this request,
2203 * when the HWS breadcrumb is equal or greater than this the GPU
2204 * has finished processing this request.
2205 */
2206 u32 seqno;
2198 2207
2199 /** Position in the ringbuffer of the start of the request */ 2208 /** Position in the ringbuffer of the start of the request */
2200 u32 head; 2209 u32 head;
@@ -2911,15 +2920,17 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
2911 return (int32_t)(seq1 - seq2) >= 0; 2920 return (int32_t)(seq1 - seq2) >= 0;
2912} 2921}
2913 2922
2923static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
2924 bool lazy_coherency)
2925{
2926 u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
2927 return i915_seqno_passed(seqno, req->previous_seqno);
2928}
2929
2914static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, 2930static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
2915 bool lazy_coherency) 2931 bool lazy_coherency)
2916{ 2932{
2917 u32 seqno; 2933 u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
2918
2919 BUG_ON(req == NULL);
2920
2921 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
2922
2923 return i915_seqno_passed(seqno, req->seqno); 2934 return i915_seqno_passed(seqno, req->seqno);
2924} 2935}
2925 2936
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8719fa2ae7e7..f56af0aaafde 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1193,9 +1193,13 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1193 * takes to sleep on a request, on the order of a microsecond. 1193 * takes to sleep on a request, on the order of a microsecond.
1194 */ 1194 */
1195 1195
1196 if (i915_gem_request_get_ring(req)->irq_refcount) 1196 if (req->ring->irq_refcount)
1197 return -EBUSY; 1197 return -EBUSY;
1198 1198
1199 /* Only spin if we know the GPU is processing this request */
1200 if (!i915_gem_request_started(req, true))
1201 return -EAGAIN;
1202
1199 timeout = local_clock_us(&cpu) + 5; 1203 timeout = local_clock_us(&cpu) + 5;
1200 while (!need_resched()) { 1204 while (!need_resched()) {
1201 if (i915_gem_request_completed(req, true)) 1205 if (i915_gem_request_completed(req, true))
@@ -1209,6 +1213,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1209 1213
1210 cpu_relax_lowlatency(); 1214 cpu_relax_lowlatency();
1211 } 1215 }
1216
1212 if (i915_gem_request_completed(req, false)) 1217 if (i915_gem_request_completed(req, false))
1213 return 0; 1218 return 0;
1214 1219
@@ -2600,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
2600 request->batch_obj = obj; 2605 request->batch_obj = obj;
2601 2606
2602 request->emitted_jiffies = jiffies; 2607 request->emitted_jiffies = jiffies;
2608 request->previous_seqno = ring->last_submitted_seqno;
2603 ring->last_submitted_seqno = request->seqno; 2609 ring->last_submitted_seqno = request->seqno;
2604 list_add_tail(&request->list, &ring->request_list); 2610 list_add_tail(&request->list, &ring->request_list);
2605 2611