aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-01-28 13:18:11 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2019-01-28 14:07:09 -0500
commit5013eb8cd601c31e6d7d1b9d3291b24e933b77b2 (patch)
tree1078ed4c54d8457b3768ee71422970144c05c053 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent8ba306a6a362ef6f3c005ec8819c8890a6fadcd1 (diff)
drm/i915: Track the context's seqno in its own timeline HWSP
Now that we have allocated ourselves a cacheline to store a breadcrumb, we can emit a write from the GPU into the timeline's HWSP of the per-context seqno as we complete each request. This drops the mirroring of the per-engine HWSP and allows each context to operate independently. We do not need to unwind the per-context timeline, and so requests are always consistent with the timeline breadcrumb, greatly simplifying the completion checks as we no longer need to be concerned about the global_seqno changing mid check. One complication though is that we have to be wary that the request may outlive the HWSP and so avoid touching the potentially danging pointer after we have retired the fence. We also have to guard our access of the HWSP with RCU, the release of the obj->mm.pages should already be RCU-safe. At this point, we are emitting both per-context and global seqno and still using the single per-engine execution timeline for resolving interrupts. v2: s/fake_complete/mark_complete/ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c87
1 files changed, 69 insertions, 18 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 174795622eb1..ee3719324e2d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -326,6 +326,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
326 PIPE_CONTROL_DC_FLUSH_ENABLE | 326 PIPE_CONTROL_DC_FLUSH_ENABLE |
327 PIPE_CONTROL_QW_WRITE | 327 PIPE_CONTROL_QW_WRITE |
328 PIPE_CONTROL_CS_STALL); 328 PIPE_CONTROL_CS_STALL);
329 *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
330 *cs++ = rq->fence.seqno;
331
332 *cs++ = GFX_OP_PIPE_CONTROL(4);
333 *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
329 *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; 334 *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
330 *cs++ = rq->global_seqno; 335 *cs++ = rq->global_seqno;
331 336
@@ -427,6 +432,13 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
427 PIPE_CONTROL_QW_WRITE | 432 PIPE_CONTROL_QW_WRITE |
428 PIPE_CONTROL_GLOBAL_GTT_IVB | 433 PIPE_CONTROL_GLOBAL_GTT_IVB |
429 PIPE_CONTROL_CS_STALL); 434 PIPE_CONTROL_CS_STALL);
435 *cs++ = rq->timeline->hwsp_offset;
436 *cs++ = rq->fence.seqno;
437
438 *cs++ = GFX_OP_PIPE_CONTROL(4);
439 *cs++ = (PIPE_CONTROL_QW_WRITE |
440 PIPE_CONTROL_GLOBAL_GTT_IVB |
441 PIPE_CONTROL_CS_STALL);
430 *cs++ = intel_hws_seqno_address(rq->engine); 442 *cs++ = intel_hws_seqno_address(rq->engine);
431 *cs++ = rq->global_seqno; 443 *cs++ = rq->global_seqno;
432 444
@@ -441,10 +453,19 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
441 453
442static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) 454static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
443{ 455{
444 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; 456 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
445 *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; 457 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
458
459 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
460 *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
461 *cs++ = rq->fence.seqno;
462
463 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
464 *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
446 *cs++ = rq->global_seqno; 465 *cs++ = rq->global_seqno;
466
447 *cs++ = MI_USER_INTERRUPT; 467 *cs++ = MI_USER_INTERRUPT;
468 *cs++ = MI_NOOP;
448 469
449 rq->tail = intel_ring_offset(rq, cs); 470 rq->tail = intel_ring_offset(rq, cs);
450 assert_ring_tail_valid(rq->ring, rq->tail); 471 assert_ring_tail_valid(rq->ring, rq->tail);
@@ -457,14 +478,21 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
457{ 478{
458 int i; 479 int i;
459 480
460 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; 481 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
461 *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT; 482 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
483
484 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
485 *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
486 *cs++ = rq->fence.seqno;
487
488 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
489 *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
462 *cs++ = rq->global_seqno; 490 *cs++ = rq->global_seqno;
463 491
464 for (i = 0; i < GEN7_XCS_WA; i++) { 492 for (i = 0; i < GEN7_XCS_WA; i++) {
465 *cs++ = MI_STORE_DWORD_INDEX; 493 *cs++ = MI_STORE_DWORD_INDEX;
466 *cs++ = I915_GEM_HWS_INDEX_ADDR; 494 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
467 *cs++ = rq->global_seqno; 495 *cs++ = rq->fence.seqno;
468 } 496 }
469 497
470 *cs++ = MI_FLUSH_DW; 498 *cs++ = MI_FLUSH_DW;
@@ -472,7 +500,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
472 *cs++ = 0; 500 *cs++ = 0;
473 501
474 *cs++ = MI_USER_INTERRUPT; 502 *cs++ = MI_USER_INTERRUPT;
475 *cs++ = MI_NOOP;
476 503
477 rq->tail = intel_ring_offset(rq, cs); 504 rq->tail = intel_ring_offset(rq, cs);
478 assert_ring_tail_valid(rq->ring, rq->tail); 505 assert_ring_tail_valid(rq->ring, rq->tail);
@@ -738,7 +765,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
738 rq = NULL; 765 rq = NULL;
739 spin_lock_irqsave(&tl->lock, flags); 766 spin_lock_irqsave(&tl->lock, flags);
740 list_for_each_entry(pos, &tl->requests, link) { 767 list_for_each_entry(pos, &tl->requests, link) {
741 if (!__i915_request_completed(pos, pos->global_seqno)) { 768 if (!i915_request_completed(pos)) {
742 rq = pos; 769 rq = pos;
743 break; 770 break;
744 } 771 }
@@ -880,10 +907,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
880 list_for_each_entry(request, &engine->timeline.requests, link) { 907 list_for_each_entry(request, &engine->timeline.requests, link) {
881 GEM_BUG_ON(!request->global_seqno); 908 GEM_BUG_ON(!request->global_seqno);
882 909
883 if (i915_request_signaled(request)) 910 if (!i915_request_signaled(request))
884 continue; 911 dma_fence_set_error(&request->fence, -EIO);
885 912
886 dma_fence_set_error(&request->fence, -EIO); 913 i915_request_mark_complete(request);
887 } 914 }
888 915
889 intel_write_status_page(engine, 916 intel_write_status_page(engine,
@@ -907,14 +934,20 @@ static void i9xx_submit_request(struct i915_request *request)
907 934
908static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) 935static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
909{ 936{
937 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
938 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
939
910 *cs++ = MI_FLUSH; 940 *cs++ = MI_FLUSH;
911 941
912 *cs++ = MI_STORE_DWORD_INDEX; 942 *cs++ = MI_STORE_DWORD_INDEX;
943 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
944 *cs++ = rq->fence.seqno;
945
946 *cs++ = MI_STORE_DWORD_INDEX;
913 *cs++ = I915_GEM_HWS_INDEX_ADDR; 947 *cs++ = I915_GEM_HWS_INDEX_ADDR;
914 *cs++ = rq->global_seqno; 948 *cs++ = rq->global_seqno;
915 949
916 *cs++ = MI_USER_INTERRUPT; 950 *cs++ = MI_USER_INTERRUPT;
917 *cs++ = MI_NOOP;
918 951
919 rq->tail = intel_ring_offset(rq, cs); 952 rq->tail = intel_ring_offset(rq, cs);
920 assert_ring_tail_valid(rq->ring, rq->tail); 953 assert_ring_tail_valid(rq->ring, rq->tail);
@@ -927,8 +960,15 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
927{ 960{
928 int i; 961 int i;
929 962
963 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
964 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
965
930 *cs++ = MI_FLUSH; 966 *cs++ = MI_FLUSH;
931 967
968 *cs++ = MI_STORE_DWORD_INDEX;
969 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
970 *cs++ = rq->fence.seqno;
971
932 BUILD_BUG_ON(GEN5_WA_STORES < 1); 972 BUILD_BUG_ON(GEN5_WA_STORES < 1);
933 for (i = 0; i < GEN5_WA_STORES; i++) { 973 for (i = 0; i < GEN5_WA_STORES; i++) {
934 *cs++ = MI_STORE_DWORD_INDEX; 974 *cs++ = MI_STORE_DWORD_INDEX;
@@ -937,6 +977,7 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
937 } 977 }
938 978
939 *cs++ = MI_USER_INTERRUPT; 979 *cs++ = MI_USER_INTERRUPT;
980 *cs++ = MI_NOOP;
940 981
941 rq->tail = intel_ring_offset(rq, cs); 982 rq->tail = intel_ring_offset(rq, cs);
942 assert_ring_tail_valid(rq->ring, rq->tail); 983 assert_ring_tail_valid(rq->ring, rq->tail);
@@ -1169,6 +1210,10 @@ int intel_ring_pin(struct intel_ring *ring)
1169 1210
1170 GEM_BUG_ON(ring->vaddr); 1211 GEM_BUG_ON(ring->vaddr);
1171 1212
1213 ret = i915_timeline_pin(ring->timeline);
1214 if (ret)
1215 return ret;
1216
1172 flags = PIN_GLOBAL; 1217 flags = PIN_GLOBAL;
1173 1218
1174 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1219 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1185,28 +1230,32 @@ int intel_ring_pin(struct intel_ring *ring)
1185 else 1230 else
1186 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); 1231 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
1187 if (unlikely(ret)) 1232 if (unlikely(ret))
1188 return ret; 1233 goto unpin_timeline;
1189 } 1234 }
1190 1235
1191 ret = i915_vma_pin(vma, 0, 0, flags); 1236 ret = i915_vma_pin(vma, 0, 0, flags);
1192 if (unlikely(ret)) 1237 if (unlikely(ret))
1193 return ret; 1238 goto unpin_timeline;
1194 1239
1195 if (i915_vma_is_map_and_fenceable(vma)) 1240 if (i915_vma_is_map_and_fenceable(vma))
1196 addr = (void __force *)i915_vma_pin_iomap(vma); 1241 addr = (void __force *)i915_vma_pin_iomap(vma);
1197 else 1242 else
1198 addr = i915_gem_object_pin_map(vma->obj, map); 1243 addr = i915_gem_object_pin_map(vma->obj, map);
1199 if (IS_ERR(addr)) 1244 if (IS_ERR(addr)) {
1200 goto err; 1245 ret = PTR_ERR(addr);
1246 goto unpin_ring;
1247 }
1201 1248
1202 vma->obj->pin_global++; 1249 vma->obj->pin_global++;
1203 1250
1204 ring->vaddr = addr; 1251 ring->vaddr = addr;
1205 return 0; 1252 return 0;
1206 1253
1207err: 1254unpin_ring:
1208 i915_vma_unpin(vma); 1255 i915_vma_unpin(vma);
1209 return PTR_ERR(addr); 1256unpin_timeline:
1257 i915_timeline_unpin(ring->timeline);
1258 return ret;
1210} 1259}
1211 1260
1212void intel_ring_reset(struct intel_ring *ring, u32 tail) 1261void intel_ring_reset(struct intel_ring *ring, u32 tail)
@@ -1235,6 +1284,8 @@ void intel_ring_unpin(struct intel_ring *ring)
1235 1284
1236 ring->vma->obj->pin_global--; 1285 ring->vma->obj->pin_global--;
1237 i915_vma_unpin(ring->vma); 1286 i915_vma_unpin(ring->vma);
1287
1288 i915_timeline_unpin(ring->timeline);
1238} 1289}
1239 1290
1240static struct i915_vma * 1291static struct i915_vma *