aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-12-28 12:16:40 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-12-31 10:35:45 -0500
commit835051d397b81e1534db0a0f378f9035caa0e77b (patch)
tree645d2b27a5098928125a1d3029ce54065b4d8441 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent1212bd821de8fd7d63003265bb3fe5711ebbc3f7 (diff)
drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
The irq_seqno_barrier is a tradeoff between doing work on every request (on the GPU) and doing work after every interrupt (on the CPU). We presume we have many more requests than interrupts! However, for Ironlake, the workaround is a pretty hideous usleep() and so even though it was found we need to repeat the MI_STORE_DWORD_IMM 8 times, or about 1us of GPU time, doing so is preferrable than requiring a sleep of 125-250us on the CPU where we desire to respond immediately (ideally from within the interrupt handler)! The additional MI_STORE_DWORD_IMM also have the side-effect of flushing MI operations from userspace which are not caught by MI_FLUSH! Testcase: igt/gem_sync Testcase: igt/gem_exec_whisper Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228171641.16531-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c40
1 files changed, 23 insertions, 17 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index dd996103d495..13ac01b67ead 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -881,26 +881,29 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
881 rq->tail = intel_ring_offset(rq, cs); 881 rq->tail = intel_ring_offset(rq, cs);
882 assert_ring_tail_valid(rq->ring, rq->tail); 882 assert_ring_tail_valid(rq->ring, rq->tail);
883} 883}
884
885static const int i9xx_emit_breadcrumb_sz = 6; 884static const int i9xx_emit_breadcrumb_sz = 6;
886 885
887static void 886#define GEN5_WA_STORES 8 /* must be at least 1! */
888gen5_seqno_barrier(struct intel_engine_cs *engine) 887static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
889{ 888{
890 /* MI_STORE are internally buffered by the GPU and not flushed 889 int i;
891 * either by MI_FLUSH or SyncFlush or any other combination of 890
892 * MI commands. 891 *cs++ = MI_FLUSH;
893 * 892
894 * "Only the submission of the store operation is guaranteed. 893 BUILD_BUG_ON(GEN5_WA_STORES < 1);
895 * The write result will be complete (coherent) some time later 894 for (i = 0; i < GEN5_WA_STORES; i++) {
896 * (this is practically a finite period but there is no guaranteed 895 *cs++ = MI_STORE_DWORD_INDEX;
897 * latency)." 896 *cs++ = I915_GEM_HWS_INDEX_ADDR;
898 * 897 *cs++ = rq->global_seqno;
899 * Empirically, we observe that we need a delay of at least 75us to 898 }
900 * be sure that the seqno write is visible by the CPU. 899
901 */ 900 *cs++ = MI_USER_INTERRUPT;
902 usleep_range(125, 250); 901
902 rq->tail = intel_ring_offset(rq, cs);
903 assert_ring_tail_valid(rq->ring, rq->tail);
903} 904}
905static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
906#undef GEN5_WA_STORES
904 907
905static void 908static void
906gen5_irq_enable(struct intel_engine_cs *engine) 909gen5_irq_enable(struct intel_engine_cs *engine)
@@ -2148,7 +2151,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2148 } else if (INTEL_GEN(dev_priv) >= 5) { 2151 } else if (INTEL_GEN(dev_priv) >= 5) {
2149 engine->irq_enable = gen5_irq_enable; 2152 engine->irq_enable = gen5_irq_enable;
2150 engine->irq_disable = gen5_irq_disable; 2153 engine->irq_disable = gen5_irq_disable;
2151 engine->irq_seqno_barrier = gen5_seqno_barrier;
2152 } else if (INTEL_GEN(dev_priv) >= 3) { 2154 } else if (INTEL_GEN(dev_priv) >= 3) {
2153 engine->irq_enable = i9xx_irq_enable; 2155 engine->irq_enable = i9xx_irq_enable;
2154 engine->irq_disable = i9xx_irq_disable; 2156 engine->irq_disable = i9xx_irq_disable;
@@ -2191,6 +2193,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2191 2193
2192 engine->emit_breadcrumb = i9xx_emit_breadcrumb; 2194 engine->emit_breadcrumb = i9xx_emit_breadcrumb;
2193 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; 2195 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
2196 if (IS_GEN(dev_priv, 5)) {
2197 engine->emit_breadcrumb = gen5_emit_breadcrumb;
2198 engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
2199 }
2194 2200
2195 engine->set_default_submission = i9xx_set_default_submission; 2201 engine->set_default_submission = i9xx_set_default_submission;
2196 2202