aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-12-28 09:07:35 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-12-28 09:43:27 -0500
commit6faf5916e6beb0dedb0fcbbafbaa152adeaea758 (patch)
tree060167be32a7b4951e5ed05054d6e93ed877916d /drivers/gpu/drm/i915/intel_ringbuffer.c
parent167bc759e8236f93d501253ec1a75431fedb5b3f (diff)
drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
The writing is on the wall for the existence of a single execution queue along each engine, and as a consequence we will not be able to track dependencies along the HW queue itself, i.e. we will not be able to use HW semaphores on gen7 as they use a global set of registers (and unlike gen8+ we can not effectively target memory to keep per-context seqno and dependencies). On the positive side, when we implement request reordering for gen7 we also can not presume a simple execution queue and would also require removing the current semaphore generation code. So this bring us another step closer to request reordering for ringbuffer submission! The negative side is that using interrupts to drive inter-engine synchronisation is much slower (4us -> 15us to do a nop on each of the 3 engines on ivb). This is much better than it was at the time of introducing the HW semaphores and equally important userspace weaned itself off intermixing dependent BLT/RENDER operations (the prime culprit was glyph rendering in UXA). So while we regress the microbenchmarks, it should not impact the user. References: https://bugs.freedesktop.org/show_bug.cgi?id=108888 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228140736.32606-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c138
1 files changed, 0 insertions, 138 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1102c2e98222..588294a3bbd2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
556 556
557 intel_engine_reset_breadcrumbs(engine); 557 intel_engine_reset_breadcrumbs(engine);
558 558
559 if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
560 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
561 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
562 if (HAS_VEBOX(dev_priv))
563 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
564 }
565
566 /* Enforce ordering by reading HEAD register back */ 559 /* Enforce ordering by reading HEAD register back */
567 I915_READ_HEAD(engine); 560 I915_READ_HEAD(engine);
568 561
@@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
745 return 0; 738 return 0;
746} 739}
747 740
748static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
749{
750 struct drm_i915_private *dev_priv = rq->i915;
751 struct intel_engine_cs *engine;
752 enum intel_engine_id id;
753 int num_rings = 0;
754
755 for_each_engine(engine, dev_priv, id) {
756 i915_reg_t mbox_reg;
757
758 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
759 continue;
760
761 mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
762 if (i915_mmio_reg_valid(mbox_reg)) {
763 *cs++ = MI_LOAD_REGISTER_IMM(1);
764 *cs++ = i915_mmio_reg_offset(mbox_reg);
765 *cs++ = rq->global_seqno;
766 num_rings++;
767 }
768 }
769 if (num_rings & 1)
770 *cs++ = MI_NOOP;
771
772 return cs;
773}
774
775static void cancel_requests(struct intel_engine_cs *engine) 741static void cancel_requests(struct intel_engine_cs *engine)
776{ 742{
777 struct i915_request *request; 743 struct i915_request *request;
@@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
822 788
823static const int i9xx_emit_breadcrumb_sz = 4; 789static const int i9xx_emit_breadcrumb_sz = 4;
824 790
825static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
826{
827 return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
828}
829
830static int
831gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
832{
833 u32 dw1 = MI_SEMAPHORE_MBOX |
834 MI_SEMAPHORE_COMPARE |
835 MI_SEMAPHORE_REGISTER;
836 u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
837 u32 *cs;
838
839 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
840
841 cs = intel_ring_begin(rq, 4);
842 if (IS_ERR(cs))
843 return PTR_ERR(cs);
844
845 *cs++ = dw1 | wait_mbox;
846 /* Throughout all of the GEM code, seqno passed implies our current
847 * seqno is >= the last seqno executed. However for hardware the
848 * comparison is strictly greater than.
849 */
850 *cs++ = signal->global_seqno - 1;
851 *cs++ = 0;
852 *cs++ = MI_NOOP;
853 intel_ring_advance(rq, cs);
854
855 return 0;
856}
857
858static void 791static void
859gen5_seqno_barrier(struct intel_engine_cs *engine) 792gen5_seqno_barrier(struct intel_engine_cs *engine)
860{ 793{
@@ -2151,66 +2084,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
2151 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); 2084 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
2152} 2085}
2153 2086
2154static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2155 struct intel_engine_cs *engine)
2156{
2157 int i;
2158
2159 if (!HAS_LEGACY_SEMAPHORES(dev_priv))
2160 return;
2161
2162 GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
2163 engine->semaphore.sync_to = gen6_ring_sync_to;
2164 engine->semaphore.signal = gen6_signal;
2165
2166 /*
2167 * The current semaphore is only applied on pre-gen8
2168 * platform. And there is no VCS2 ring on the pre-gen8
2169 * platform. So the semaphore between RCS and VCS2 is
2170 * initialized as INVALID.
2171 */
2172 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
2173 static const struct {
2174 u32 wait_mbox;
2175 i915_reg_t mbox_reg;
2176 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
2177 [RCS_HW] = {
2178 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
2179 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
2180 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
2181 },
2182 [VCS_HW] = {
2183 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
2184 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
2185 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
2186 },
2187 [BCS_HW] = {
2188 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
2189 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
2190 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
2191 },
2192 [VECS_HW] = {
2193 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
2194 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
2195 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
2196 },
2197 };
2198 u32 wait_mbox;
2199 i915_reg_t mbox_reg;
2200
2201 if (i == engine->hw_id) {
2202 wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
2203 mbox_reg = GEN6_NOSYNC;
2204 } else {
2205 wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
2206 mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
2207 }
2208
2209 engine->semaphore.mbox.wait[i] = wait_mbox;
2210 engine->semaphore.mbox.signal[i] = mbox_reg;
2211 }
2212}
2213
2214static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2087static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2215 struct intel_engine_cs *engine) 2088 struct intel_engine_cs *engine)
2216{ 2089{
@@ -2253,7 +2126,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2253 GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); 2126 GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
2254 2127
2255 intel_ring_init_irq(dev_priv, engine); 2128 intel_ring_init_irq(dev_priv, engine);
2256 intel_ring_init_semaphores(dev_priv, engine);
2257 2129
2258 engine->init_hw = init_ring_common; 2130 engine->init_hw = init_ring_common;
2259 engine->reset.prepare = reset_prepare; 2131 engine->reset.prepare = reset_prepare;
@@ -2265,16 +2137,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2265 2137
2266 engine->emit_breadcrumb = i9xx_emit_breadcrumb; 2138 engine->emit_breadcrumb = i9xx_emit_breadcrumb;
2267 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; 2139 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
2268 if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
2269 int num_rings;
2270
2271 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
2272
2273 num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
2274 engine->emit_breadcrumb_sz += num_rings * 3;
2275 if (num_rings & 1)
2276 engine->emit_breadcrumb_sz++;
2277 }
2278 2140
2279 engine->set_default_submission = i9xx_set_default_submission; 2141 engine->set_default_submission = i9xx_set_default_submission;
2280 2142