aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-12-28 09:07:35 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-12-28 09:43:27 -0500
commit6faf5916e6beb0dedb0fcbbafbaa152adeaea758 (patch)
tree060167be32a7b4951e5ed05054d6e93ed877916d /drivers/gpu
parent167bc759e8236f93d501253ec1a75431fedb5b3f (diff)
drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
The writing is on the wall for the existence of a single execution queue along each engine, and as a consequence we will not be able to track dependencies along the HW queue itself, i.e. we will not be able to use HW semaphores on gen7 as they use a global set of registers (and unlike gen8+ we can not effectively target memory to keep per-context seqno and dependencies). On the positive side, when we implement request reordering for gen7 we also can not presume a simple execution queue and would also require removing the current semaphore generation code. So this bring us another step closer to request reordering for ringbuffer submission! The negative side is that using interrupts to drive inter-engine synchronisation is much slower (4us -> 15us to do a nop on each of the 3 engines on ivb). This is much better than it was at the time of introducing the HW semaphores and equally important userspace weaned itself off intermixing dependent BLT/RENDER operations (the prime culprit was glyph rendering in UXA). So while we regress the microbenchmarks, it should not impact the user. References: https://bugs.freedesktop.org/show_bug.cgi?id=108888 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228140736.32606-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c19
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c4
-rw-r--r--drivers/gpu/drm/i915/i915_request.c126
-rw-r--r--drivers/gpu/drm/i915/i915_timeline.h8
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h29
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c29
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c155
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c138
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h56
11 files changed, 12 insertions, 557 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2d29ce630c0e..1269d734ade0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
1041static int 1041static int
1042i915_next_seqno_set(void *data, u64 val) 1042i915_next_seqno_set(void *data, u64 val)
1043{ 1043{
1044 struct drm_i915_private *dev_priv = data; 1044 return val ? 0 : -EINVAL;
1045 struct drm_device *dev = &dev_priv->drm;
1046 int ret;
1047
1048 ret = mutex_lock_interruptible(&dev->struct_mutex);
1049 if (ret)
1050 return ret;
1051
1052 intel_runtime_pm_get(dev_priv);
1053 ret = i915_gem_set_global_seqno(dev, val);
1054 intel_runtime_pm_put(dev_priv);
1055
1056 mutex_unlock(&dev->struct_mutex);
1057
1058 return ret;
1059} 1045}
1060 1046
1061DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, 1047DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
@@ -4101,9 +4087,6 @@ i915_drop_caches_set(void *data, u64 val)
4101 I915_WAIT_LOCKED, 4087 I915_WAIT_LOCKED,
4102 MAX_SCHEDULE_TIMEOUT); 4088 MAX_SCHEDULE_TIMEOUT);
4103 4089
4104 if (ret == 0 && val & DROP_RESET_SEQNO)
4105 ret = i915_gem_set_global_seqno(&i915->drm, 1);
4106
4107 if (val & DROP_RETIRE) 4090 if (val & DROP_RETIRE)
4108 i915_retire_requests(i915); 4091 i915_retire_requests(i915);
4109 4092
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index caa055ac9472..dcb935338c63 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
349 value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL); 349 value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
350 break; 350 break;
351 case I915_PARAM_HAS_SEMAPHORES: 351 case I915_PARAM_HAS_SEMAPHORES:
352 value = HAS_LEGACY_SEMAPHORES(dev_priv); 352 value = 0;
353 break; 353 break;
354 case I915_PARAM_HAS_SECURE_BATCHES: 354 case I915_PARAM_HAS_SECURE_BATCHES:
355 value = capable(CAP_SYS_ADMIN); 355 value = capable(CAP_SYS_ADMIN);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 936ec09c9490..287f06b9e95a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1948,7 +1948,6 @@ struct drm_i915_private {
1948 struct list_head active_rings; 1948 struct list_head active_rings;
1949 struct list_head closed_vma; 1949 struct list_head closed_vma;
1950 u32 active_requests; 1950 u32 active_requests;
1951 u32 request_serial;
1952 1951
1953 /** 1952 /**
1954 * Is the GPU currently considered idle, or busy executing 1953 * Is the GPU currently considered idle, or busy executing
@@ -2396,8 +2395,6 @@ intel_info(const struct drm_i915_private *dev_priv)
2396#define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) 2395#define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS)
2397#define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) 2396#define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS)
2398 2397
2399#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7)
2400
2401#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) 2398#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc)
2402#define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) 2399#define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop)
2403#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) 2400#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 39ee67e01bb7..9e65c37daa9c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request)
3318 3318
3319 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3319 spin_lock_irqsave(&request->engine->timeline.lock, flags);
3320 __i915_request_submit(request); 3320 __i915_request_submit(request);
3321 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3321 intel_engine_write_global_seqno(request->engine, request->global_seqno);
3322 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3322 spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
3323} 3323}
3324 3324
@@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
3359 3359
3360 /* 3360 /*
3361 * Make sure no request can slip through without getting completed by 3361 * Make sure no request can slip through without getting completed by
3362 * either this call here to intel_engine_init_global_seqno, or the one 3362 * either this call here to intel_engine_write_global_seqno, or the one
3363 * in nop_submit_request. 3363 * in nop_submit_request.
3364 */ 3364 */
3365 synchronize_rcu(); 3365 synchronize_rcu();
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8ab8e8e6a086..2cd1f51963f3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request)
111 spin_unlock(&file_priv->mm.lock); 111 spin_unlock(&file_priv->mm.lock);
112} 112}
113 113
114static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) 114static void reserve_gt(struct drm_i915_private *i915)
115{ 115{
116 struct intel_engine_cs *engine;
117 struct i915_timeline *timeline;
118 enum intel_engine_id id;
119 int ret;
120
121 /* Carefully retire all requests without writing to the rings */
122 ret = i915_gem_wait_for_idle(i915,
123 I915_WAIT_INTERRUPTIBLE |
124 I915_WAIT_LOCKED,
125 MAX_SCHEDULE_TIMEOUT);
126 if (ret)
127 return ret;
128
129 GEM_BUG_ON(i915->gt.active_requests);
130
131 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
132 for_each_engine(engine, i915, id) {
133 GEM_TRACE("%s seqno %d (current %d) -> %d\n",
134 engine->name,
135 engine->timeline.seqno,
136 intel_engine_get_seqno(engine),
137 seqno);
138
139 if (seqno == engine->timeline.seqno)
140 continue;
141
142 kthread_park(engine->breadcrumbs.signaler);
143
144 if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
145 /* Flush any waiters before we reuse the seqno */
146 intel_engine_disarm_breadcrumbs(engine);
147 intel_engine_init_hangcheck(engine);
148 GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
149 }
150
151 /* Check we are idle before we fiddle with hw state! */
152 GEM_BUG_ON(!intel_engine_is_idle(engine));
153 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
154
155 /* Finally reset hw state */
156 intel_engine_init_global_seqno(engine, seqno);
157 engine->timeline.seqno = seqno;
158
159 kthread_unpark(engine->breadcrumbs.signaler);
160 }
161
162 list_for_each_entry(timeline, &i915->gt.timelines, link)
163 memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
164
165 i915->gt.request_serial = seqno;
166
167 return 0;
168}
169
170int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
171{
172 struct drm_i915_private *i915 = to_i915(dev);
173
174 lockdep_assert_held(&i915->drm.struct_mutex);
175
176 if (seqno == 0)
177 return -EINVAL;
178
179 /* HWS page needs to be set less than what we will inject to ring */
180 return reset_all_global_seqno(i915, seqno - 1);
181}
182
183static int reserve_gt(struct drm_i915_private *i915)
184{
185 int ret;
186
187 /*
188 * Reservation is fine until we may need to wrap around
189 *
190 * By incrementing the serial for every request, we know that no
191 * individual engine may exceed that serial (as each is reset to 0
192 * on any wrap). This protects even the most pessimistic of migrations
193 * of every request from all engines onto just one.
194 */
195 while (unlikely(++i915->gt.request_serial == 0)) {
196 ret = reset_all_global_seqno(i915, 0);
197 if (ret) {
198 i915->gt.request_serial--;
199 return ret;
200 }
201 }
202
203 if (!i915->gt.active_requests++) 116 if (!i915->gt.active_requests++)
204 i915_gem_unpark(i915); 117 i915_gem_unpark(i915);
205
206 return 0;
207} 118}
208 119
209static void unreserve_gt(struct drm_i915_private *i915) 120static void unreserve_gt(struct drm_i915_private *i915)
@@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
608 if (IS_ERR(ce)) 519 if (IS_ERR(ce))
609 return ERR_CAST(ce); 520 return ERR_CAST(ce);
610 521
611 ret = reserve_gt(i915); 522 reserve_gt(i915);
612 if (ret)
613 goto err_unpin;
614 523
615 ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); 524 ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
616 if (ret) 525 if (ret)
@@ -743,7 +652,6 @@ err_unwind:
743 kmem_cache_free(i915->requests, rq); 652 kmem_cache_free(i915->requests, rq);
744err_unreserve: 653err_unreserve:
745 unreserve_gt(i915); 654 unreserve_gt(i915);
746err_unpin:
747 intel_context_unpin(ce); 655 intel_context_unpin(ce);
748 return ERR_PTR(ret); 656 return ERR_PTR(ret);
749} 657}
@@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
771 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, 679 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
772 &from->submit, 680 &from->submit,
773 I915_FENCE_GFP); 681 I915_FENCE_GFP);
774 return ret < 0 ? ret : 0; 682 } else {
775 } 683 ret = i915_sw_fence_await_dma_fence(&to->submit,
776 684 &from->fence, 0,
777 if (to->engine->semaphore.sync_to) { 685 I915_FENCE_GFP);
778 u32 seqno;
779
780 GEM_BUG_ON(!from->engine->semaphore.signal);
781
782 seqno = i915_request_global_seqno(from);
783 if (!seqno)
784 goto await_dma_fence;
785
786 if (seqno <= to->timeline->global_sync[from->engine->id])
787 return 0;
788
789 trace_i915_gem_ring_sync_to(to, from);
790 ret = to->engine->semaphore.sync_to(to, from);
791 if (ret)
792 return ret;
793
794 to->timeline->global_sync[from->engine->id] = seqno;
795 return 0;
796 } 686 }
797 687
798await_dma_fence:
799 ret = i915_sw_fence_await_dma_fence(&to->submit,
800 &from->fence, 0,
801 I915_FENCE_GFP);
802 return ret < 0 ? ret : 0; 688 return ret < 0 ? ret : 0;
803} 689}
804 690
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index ebd71b487220..38c1e15e927a 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -63,14 +63,6 @@ struct i915_timeline {
63 * redundant and we can discard it without loss of generality. 63 * redundant and we can discard it without loss of generality.
64 */ 64 */
65 struct i915_syncmap *sync; 65 struct i915_syncmap *sync;
66 /**
67 * Separately to the inter-context seqno map above, we track the last
68 * barrier (e.g. semaphore wait) to the global engine timelines. Note
69 * that this tracks global_seqno rather than the context.seqno, and
70 * so it is subject to the limitations of hw wraparound and that we
71 * may need to revoke global_seqno (on pre-emption).
72 */
73 u32 global_sync[I915_NUM_ENGINES];
74 66
75 struct list_head link; 67 struct list_head link;
76 const char *name; 68 const char *name;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index b50c6b829715..5cf378936b05 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -585,35 +585,6 @@ TRACE_EVENT(i915_gem_evict_vm,
585 TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) 585 TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
586); 586);
587 587
588TRACE_EVENT(i915_gem_ring_sync_to,
589 TP_PROTO(struct i915_request *to, struct i915_request *from),
590 TP_ARGS(to, from),
591
592 TP_STRUCT__entry(
593 __field(u32, dev)
594 __field(u32, from_class)
595 __field(u32, from_instance)
596 __field(u32, to_class)
597 __field(u32, to_instance)
598 __field(u32, seqno)
599 ),
600
601 TP_fast_assign(
602 __entry->dev = from->i915->drm.primary->index;
603 __entry->from_class = from->engine->uabi_class;
604 __entry->from_instance = from->engine->instance;
605 __entry->to_class = to->engine->uabi_class;
606 __entry->to_instance = to->engine->instance;
607 __entry->seqno = from->global_seqno;
608 ),
609
610 TP_printk("dev=%u, sync-from=%u:%u, sync-to=%u:%u, seqno=%u",
611 __entry->dev,
612 __entry->from_class, __entry->from_instance,
613 __entry->to_class, __entry->to_instance,
614 __entry->seqno)
615);
616
617TRACE_EVENT(i915_request_queue, 588TRACE_EVENT(i915_request_queue,
618 TP_PROTO(struct i915_request *rq, u32 flags), 589 TP_PROTO(struct i915_request *rq, u32 flags),
619 TP_ARGS(rq, flags), 590 TP_ARGS(rq, flags),
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 561b474cbab1..78fc777c4bf4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -454,25 +454,8 @@ cleanup:
454 return err; 454 return err;
455} 455}
456 456
457void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) 457void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
458{ 458{
459 struct drm_i915_private *dev_priv = engine->i915;
460
461 /* Our semaphore implementation is strictly monotonic (i.e. we proceed
462 * so long as the semaphore value in the register/page is greater
463 * than the sync value), so whenever we reset the seqno,
464 * so long as we reset the tracking semaphore value to 0, it will
465 * always be before the next request's seqno. If we don't reset
466 * the semaphore value, then when the seqno moves backwards all
467 * future waits will complete instantly (causing rendering corruption).
468 */
469 if (IS_GEN_RANGE(dev_priv, 6, 7)) {
470 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
471 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
472 if (HAS_VEBOX(dev_priv))
473 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
474 }
475
476 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 459 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
477 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 460 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
478 461
@@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
1300 drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); 1283 drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
1301 } 1284 }
1302 1285
1303 if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
1304 drm_printf(m, "\tSYNC_0: 0x%08x\n",
1305 I915_READ(RING_SYNC_0(engine->mmio_base)));
1306 drm_printf(m, "\tSYNC_1: 0x%08x\n",
1307 I915_READ(RING_SYNC_1(engine->mmio_base)));
1308 if (HAS_VEBOX(dev_priv))
1309 drm_printf(m, "\tSYNC_2: 0x%08x\n",
1310 I915_READ(RING_SYNC_2(engine->mmio_base)));
1311 }
1312
1313 addr = intel_engine_get_active_head(engine); 1286 addr = intel_engine_get_active_head(engine);
1314 drm_printf(m, "\tACTHD: 0x%08x_%08x\n", 1287 drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
1315 upper_32_bits(addr), lower_32_bits(addr)); 1288 upper_32_bits(addr), lower_32_bits(addr));
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 495fa145f37f..c3f929f59424 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -24,144 +24,6 @@
24 24
25#include "i915_drv.h" 25#include "i915_drv.h"
26 26
27static bool
28ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
29{
30 ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
31 return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
32 MI_SEMAPHORE_REGISTER);
33}
34
35static struct intel_engine_cs *
36semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
37 u64 offset)
38{
39 struct drm_i915_private *dev_priv = engine->i915;
40 u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
41 struct intel_engine_cs *signaller;
42 enum intel_engine_id id;
43
44 for_each_engine(signaller, dev_priv, id) {
45 if (engine == signaller)
46 continue;
47
48 if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
49 return signaller;
50 }
51
52 DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
53 engine->name, ipehr);
54
55 return ERR_PTR(-ENODEV);
56}
57
58static struct intel_engine_cs *
59semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
60{
61 struct drm_i915_private *dev_priv = engine->i915;
62 void __iomem *vaddr;
63 u32 cmd, ipehr, head;
64 u64 offset = 0;
65 int i, backwards;
66
67 /*
68 * This function does not support execlist mode - any attempt to
69 * proceed further into this function will result in a kernel panic
70 * when dereferencing ring->buffer, which is not set up in execlist
71 * mode.
72 *
73 * The correct way of doing it would be to derive the currently
74 * executing ring buffer from the current context, which is derived
75 * from the currently running request. Unfortunately, to get the
76 * current request we would have to grab the struct_mutex before doing
77 * anything else, which would be ill-advised since some other thread
78 * might have grabbed it already and managed to hang itself, causing
79 * the hang checker to deadlock.
80 *
81 * Therefore, this function does not support execlist mode in its
82 * current form. Just return NULL and move on.
83 */
84 if (engine->buffer == NULL)
85 return NULL;
86
87 ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
88 if (!ipehr_is_semaphore_wait(engine, ipehr))
89 return NULL;
90
91 /*
92 * HEAD is likely pointing to the dword after the actual command,
93 * so scan backwards until we find the MBOX. But limit it to just 3
94 * or 4 dwords depending on the semaphore wait command size.
95 * Note that we don't care about ACTHD here since that might
96 * point at at batch, and semaphores are always emitted into the
97 * ringbuffer itself.
98 */
99 head = I915_READ_HEAD(engine) & HEAD_ADDR;
100 backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
101 vaddr = (void __iomem *)engine->buffer->vaddr;
102
103 for (i = backwards; i; --i) {
104 /*
105 * Be paranoid and presume the hw has gone off into the wild -
106 * our ring is smaller than what the hardware (and hence
107 * HEAD_ADDR) allows. Also handles wrap-around.
108 */
109 head &= engine->buffer->size - 1;
110
111 /* This here seems to blow up */
112 cmd = ioread32(vaddr + head);
113 if (cmd == ipehr)
114 break;
115
116 head -= 4;
117 }
118
119 if (!i)
120 return NULL;
121
122 *seqno = ioread32(vaddr + head + 4) + 1;
123 return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
124}
125
126static int semaphore_passed(struct intel_engine_cs *engine)
127{
128 struct drm_i915_private *dev_priv = engine->i915;
129 struct intel_engine_cs *signaller;
130 u32 seqno;
131
132 engine->hangcheck.deadlock++;
133
134 signaller = semaphore_waits_for(engine, &seqno);
135 if (signaller == NULL)
136 return -1;
137
138 if (IS_ERR(signaller))
139 return 0;
140
141 /* Prevent pathological recursion due to driver bugs */
142 if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
143 return -1;
144
145 if (intel_engine_signaled(signaller, seqno))
146 return 1;
147
148 /* cursory check for an unkickable deadlock */
149 if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
150 semaphore_passed(signaller) < 0)
151 return -1;
152
153 return 0;
154}
155
156static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
157{
158 struct intel_engine_cs *engine;
159 enum intel_engine_id id;
160
161 for_each_engine(engine, dev_priv, id)
162 engine->hangcheck.deadlock = 0;
163}
164
165static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) 27static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
166{ 28{
167 u32 tmp = current_instdone | *old_instdone; 29 u32 tmp = current_instdone | *old_instdone;
@@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
252 return ENGINE_WAIT_KICK; 114 return ENGINE_WAIT_KICK;
253 } 115 }
254 116
255 if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
256 switch (semaphore_passed(engine)) {
257 default:
258 return ENGINE_DEAD;
259 case 1:
260 i915_handle_error(dev_priv, ALL_ENGINES, 0,
261 "stuck semaphore on %s",
262 engine->name);
263 I915_WRITE_CTL(engine, tmp);
264 return ENGINE_WAIT_KICK;
265 case 0:
266 return ENGINE_WAIT;
267 }
268 }
269
270 return ENGINE_DEAD; 117 return ENGINE_DEAD;
271} 118}
272 119
@@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
433 for_each_engine(engine, dev_priv, id) { 280 for_each_engine(engine, dev_priv, id) {
434 struct intel_engine_hangcheck hc; 281 struct intel_engine_hangcheck hc;
435 282
436 semaphore_clear_deadlocks(dev_priv);
437
438 hangcheck_load_sample(engine, &hc); 283 hangcheck_load_sample(engine, &hc);
439 hangcheck_accumulate_sample(engine, &hc); 284 hangcheck_accumulate_sample(engine, &hc);
440 hangcheck_store_sample(engine, &hc); 285 hangcheck_store_sample(engine, &hc);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1102c2e98222..588294a3bbd2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
556 556
557 intel_engine_reset_breadcrumbs(engine); 557 intel_engine_reset_breadcrumbs(engine);
558 558
559 if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
560 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
561 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
562 if (HAS_VEBOX(dev_priv))
563 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
564 }
565
566 /* Enforce ordering by reading HEAD register back */ 559 /* Enforce ordering by reading HEAD register back */
567 I915_READ_HEAD(engine); 560 I915_READ_HEAD(engine);
568 561
@@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
745 return 0; 738 return 0;
746} 739}
747 740
748static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
749{
750 struct drm_i915_private *dev_priv = rq->i915;
751 struct intel_engine_cs *engine;
752 enum intel_engine_id id;
753 int num_rings = 0;
754
755 for_each_engine(engine, dev_priv, id) {
756 i915_reg_t mbox_reg;
757
758 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
759 continue;
760
761 mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
762 if (i915_mmio_reg_valid(mbox_reg)) {
763 *cs++ = MI_LOAD_REGISTER_IMM(1);
764 *cs++ = i915_mmio_reg_offset(mbox_reg);
765 *cs++ = rq->global_seqno;
766 num_rings++;
767 }
768 }
769 if (num_rings & 1)
770 *cs++ = MI_NOOP;
771
772 return cs;
773}
774
775static void cancel_requests(struct intel_engine_cs *engine) 741static void cancel_requests(struct intel_engine_cs *engine)
776{ 742{
777 struct i915_request *request; 743 struct i915_request *request;
@@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
822 788
823static const int i9xx_emit_breadcrumb_sz = 4; 789static const int i9xx_emit_breadcrumb_sz = 4;
824 790
825static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
826{
827 return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
828}
829
830static int
831gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
832{
833 u32 dw1 = MI_SEMAPHORE_MBOX |
834 MI_SEMAPHORE_COMPARE |
835 MI_SEMAPHORE_REGISTER;
836 u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
837 u32 *cs;
838
839 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
840
841 cs = intel_ring_begin(rq, 4);
842 if (IS_ERR(cs))
843 return PTR_ERR(cs);
844
845 *cs++ = dw1 | wait_mbox;
846 /* Throughout all of the GEM code, seqno passed implies our current
847 * seqno is >= the last seqno executed. However for hardware the
848 * comparison is strictly greater than.
849 */
850 *cs++ = signal->global_seqno - 1;
851 *cs++ = 0;
852 *cs++ = MI_NOOP;
853 intel_ring_advance(rq, cs);
854
855 return 0;
856}
857
858static void 791static void
859gen5_seqno_barrier(struct intel_engine_cs *engine) 792gen5_seqno_barrier(struct intel_engine_cs *engine)
860{ 793{
@@ -2151,66 +2084,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
2151 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); 2084 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
2152} 2085}
2153 2086
2154static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
2155 struct intel_engine_cs *engine)
2156{
2157 int i;
2158
2159 if (!HAS_LEGACY_SEMAPHORES(dev_priv))
2160 return;
2161
2162 GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
2163 engine->semaphore.sync_to = gen6_ring_sync_to;
2164 engine->semaphore.signal = gen6_signal;
2165
2166 /*
2167 * The current semaphore is only applied on pre-gen8
2168 * platform. And there is no VCS2 ring on the pre-gen8
2169 * platform. So the semaphore between RCS and VCS2 is
2170 * initialized as INVALID.
2171 */
2172 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
2173 static const struct {
2174 u32 wait_mbox;
2175 i915_reg_t mbox_reg;
2176 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
2177 [RCS_HW] = {
2178 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
2179 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
2180 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
2181 },
2182 [VCS_HW] = {
2183 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
2184 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
2185 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
2186 },
2187 [BCS_HW] = {
2188 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
2189 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
2190 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
2191 },
2192 [VECS_HW] = {
2193 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
2194 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
2195 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
2196 },
2197 };
2198 u32 wait_mbox;
2199 i915_reg_t mbox_reg;
2200
2201 if (i == engine->hw_id) {
2202 wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
2203 mbox_reg = GEN6_NOSYNC;
2204 } else {
2205 wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
2206 mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
2207 }
2208
2209 engine->semaphore.mbox.wait[i] = wait_mbox;
2210 engine->semaphore.mbox.signal[i] = mbox_reg;
2211 }
2212}
2213
2214static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2087static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2215 struct intel_engine_cs *engine) 2088 struct intel_engine_cs *engine)
2216{ 2089{
@@ -2253,7 +2126,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2253 GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); 2126 GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
2254 2127
2255 intel_ring_init_irq(dev_priv, engine); 2128 intel_ring_init_irq(dev_priv, engine);
2256 intel_ring_init_semaphores(dev_priv, engine);
2257 2129
2258 engine->init_hw = init_ring_common; 2130 engine->init_hw = init_ring_common;
2259 engine->reset.prepare = reset_prepare; 2131 engine->reset.prepare = reset_prepare;
@@ -2265,16 +2137,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2265 2137
2266 engine->emit_breadcrumb = i9xx_emit_breadcrumb; 2138 engine->emit_breadcrumb = i9xx_emit_breadcrumb;
2267 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; 2139 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
2268 if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
2269 int num_rings;
2270
2271 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
2272
2273 num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
2274 engine->emit_breadcrumb_sz += num_rings * 3;
2275 if (num_rings & 1)
2276 engine->emit_breadcrumb_sz++;
2277 }
2278 2140
2279 engine->set_default_submission = i9xx_set_default_submission; 2141 engine->set_default_submission = i9xx_set_default_submission;
2280 2142
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6b41b9ce5f5b..c927bdfb1ed0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -510,60 +510,6 @@ struct intel_engine_cs {
510 void (*irq_seqno_barrier)(struct intel_engine_cs *engine); 510 void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
511 void (*cleanup)(struct intel_engine_cs *engine); 511 void (*cleanup)(struct intel_engine_cs *engine);
512 512
513 /* GEN8 signal/wait table - never trust comments!
514 * signal to signal to signal to signal to signal to
515 * RCS VCS BCS VECS VCS2
516 * --------------------------------------------------------------------
517 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
518 * |-------------------------------------------------------------------
519 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
520 * |-------------------------------------------------------------------
521 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
522 * |-------------------------------------------------------------------
523 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) |
524 * |-------------------------------------------------------------------
525 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) |
526 * |-------------------------------------------------------------------
527 *
528 * Generalization:
529 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
530 * ie. transpose of g(x, y)
531 *
532 * sync from sync from sync from sync from sync from
533 * RCS VCS BCS VECS VCS2
534 * --------------------------------------------------------------------
535 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
536 * |-------------------------------------------------------------------
537 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
538 * |-------------------------------------------------------------------
539 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
540 * |-------------------------------------------------------------------
541 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) |
542 * |-------------------------------------------------------------------
543 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) |
544 * |-------------------------------------------------------------------
545 *
546 * Generalization:
547 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
548 * ie. transpose of f(x, y)
549 */
550 struct {
551#define GEN6_SEMAPHORE_LAST VECS_HW
552#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
553#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
554 struct {
555 /* our mbox written by others */
556 u32 wait[GEN6_NUM_SEMAPHORES];
557 /* mboxes this ring signals to */
558 i915_reg_t signal[GEN6_NUM_SEMAPHORES];
559 } mbox;
560
561 /* AKA wait() */
562 int (*sync_to)(struct i915_request *rq,
563 struct i915_request *signal);
564 u32 *(*signal)(struct i915_request *rq, u32 *cs);
565 } semaphore;
566
567 struct intel_engine_execlists execlists; 513 struct intel_engine_execlists execlists;
568 514
569 /* Contexts are pinned whilst they are active on the GPU. The last 515 /* Contexts are pinned whilst they are active on the GPU. The last
@@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
889 return tail; 835 return tail;
890} 836}
891 837
892void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); 838void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
893 839
894void intel_engine_setup_common(struct intel_engine_cs *engine); 840void intel_engine_setup_common(struct intel_engine_cs *engine);
895int intel_engine_init_common(struct intel_engine_cs *engine); 841int intel_engine_init_common(struct intel_engine_cs *engine);