diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2019-01-28 13:18:07 -0500 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-01-28 14:06:59 -0500 |
commit | 3adac4689f58cb3fb666d92dff0ee73cc97d24d7 (patch) | |
tree | 90bc29f50d97398999529cfd070c7944ec89f9c1 | |
parent | 1e345568e3b541e19202caadae8d2cb2237e7ed8 (diff) |
drm/i915: Introduce concept of per-timeline (context) HWSP
Supplement the per-engine HWSP with a per-timeline HWSP. That is a
per-request pointer through which we can check a local seqno,
abstracting away the presumption of a global seqno. In this first step,
we point each request back into the engine's HWSP so everything
continues to work with the global timeline.
v2: s/i915_request_hwsp/hwsp_seqno/ to emphasis that this is the current
HW value and that we are accessing it via i915_request merely as a
convenience.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-1-chris@chris-wilson.co.uk
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.h | 45 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 9 |
3 files changed, 55 insertions, 15 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f4241a17e2ad..a076fd0b7ba6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c | |||
@@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request) | |||
182 | static void __retire_engine_request(struct intel_engine_cs *engine, | 182 | static void __retire_engine_request(struct intel_engine_cs *engine, |
183 | struct i915_request *rq) | 183 | struct i915_request *rq) |
184 | { | 184 | { |
185 | GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", | 185 | GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", |
186 | __func__, engine->name, | 186 | __func__, engine->name, |
187 | rq->fence.context, rq->fence.seqno, | 187 | rq->fence.context, rq->fence.seqno, |
188 | rq->global_seqno, | 188 | rq->global_seqno, |
189 | hwsp_seqno(rq), | ||
189 | intel_engine_get_seqno(engine)); | 190 | intel_engine_get_seqno(engine)); |
190 | 191 | ||
191 | GEM_BUG_ON(!i915_request_completed(rq)); | 192 | GEM_BUG_ON(!i915_request_completed(rq)); |
@@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request) | |||
244 | { | 245 | { |
245 | struct i915_gem_active *active, *next; | 246 | struct i915_gem_active *active, *next; |
246 | 247 | ||
247 | GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", | 248 | GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", |
248 | request->engine->name, | 249 | request->engine->name, |
249 | request->fence.context, request->fence.seqno, | 250 | request->fence.context, request->fence.seqno, |
250 | request->global_seqno, | 251 | request->global_seqno, |
252 | hwsp_seqno(request), | ||
251 | intel_engine_get_seqno(request->engine)); | 253 | intel_engine_get_seqno(request->engine)); |
252 | 254 | ||
253 | lockdep_assert_held(&request->i915->drm.struct_mutex); | 255 | lockdep_assert_held(&request->i915->drm.struct_mutex); |
@@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq) | |||
307 | struct intel_ring *ring = rq->ring; | 309 | struct intel_ring *ring = rq->ring; |
308 | struct i915_request *tmp; | 310 | struct i915_request *tmp; |
309 | 311 | ||
310 | GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", | 312 | GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", |
311 | rq->engine->name, | 313 | rq->engine->name, |
312 | rq->fence.context, rq->fence.seqno, | 314 | rq->fence.context, rq->fence.seqno, |
313 | rq->global_seqno, | 315 | rq->global_seqno, |
316 | hwsp_seqno(rq), | ||
314 | intel_engine_get_seqno(rq->engine)); | 317 | intel_engine_get_seqno(rq->engine)); |
315 | 318 | ||
316 | lockdep_assert_held(&rq->i915->drm.struct_mutex); | 319 | lockdep_assert_held(&rq->i915->drm.struct_mutex); |
@@ -355,10 +358,11 @@ void __i915_request_submit(struct i915_request *request) | |||
355 | struct intel_engine_cs *engine = request->engine; | 358 | struct intel_engine_cs *engine = request->engine; |
356 | u32 seqno; | 359 | u32 seqno; |
357 | 360 | ||
358 | GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", | 361 | GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", |
359 | engine->name, | 362 | engine->name, |
360 | request->fence.context, request->fence.seqno, | 363 | request->fence.context, request->fence.seqno, |
361 | engine->timeline.seqno + 1, | 364 | engine->timeline.seqno + 1, |
365 | hwsp_seqno(request), | ||
362 | intel_engine_get_seqno(engine)); | 366 | intel_engine_get_seqno(engine)); |
363 | 367 | ||
364 | GEM_BUG_ON(!irqs_disabled()); | 368 | GEM_BUG_ON(!irqs_disabled()); |
@@ -405,10 +409,11 @@ void __i915_request_unsubmit(struct i915_request *request) | |||
405 | { | 409 | { |
406 | struct intel_engine_cs *engine = request->engine; | 410 | struct intel_engine_cs *engine = request->engine; |
407 | 411 | ||
408 | GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", | 412 | GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", |
409 | engine->name, | 413 | engine->name, |
410 | request->fence.context, request->fence.seqno, | 414 | request->fence.context, request->fence.seqno, |
411 | request->global_seqno, | 415 | request->global_seqno, |
416 | hwsp_seqno(request), | ||
412 | intel_engine_get_seqno(engine)); | 417 | intel_engine_get_seqno(engine)); |
413 | 418 | ||
414 | GEM_BUG_ON(!irqs_disabled()); | 419 | GEM_BUG_ON(!irqs_disabled()); |
@@ -616,6 +621,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) | |||
616 | rq->ring = ce->ring; | 621 | rq->ring = ce->ring; |
617 | rq->timeline = ce->ring->timeline; | 622 | rq->timeline = ce->ring->timeline; |
618 | GEM_BUG_ON(rq->timeline == &engine->timeline); | 623 | GEM_BUG_ON(rq->timeline == &engine->timeline); |
624 | rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX]; | ||
619 | 625 | ||
620 | spin_lock_init(&rq->lock); | 626 | spin_lock_init(&rq->lock); |
621 | dma_fence_init(&rq->fence, | 627 | dma_fence_init(&rq->fence, |
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c0f084ca4f29..ade010fe6e26 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h | |||
@@ -130,6 +130,13 @@ struct i915_request { | |||
130 | struct i915_sched_node sched; | 130 | struct i915_sched_node sched; |
131 | struct i915_dependency dep; | 131 | struct i915_dependency dep; |
132 | 132 | ||
133 | /* | ||
134 | * A convenience pointer to the current breadcrumb value stored in | ||
135 | * the HW status page (or our timeline's local equivalent). The full | ||
136 | * path would be rq->hw_context->ring->timeline->hwsp_seqno. | ||
137 | */ | ||
138 | const u32 *hwsp_seqno; | ||
139 | |||
133 | /** | 140 | /** |
134 | * GEM sequence number associated with this request on the | 141 | * GEM sequence number associated with this request on the |
135 | * global execution timeline. It is zero when the request is not | 142 | * global execution timeline. It is zero when the request is not |
@@ -285,11 +292,6 @@ static inline bool i915_request_signaled(const struct i915_request *rq) | |||
285 | return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); | 292 | return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); |
286 | } | 293 | } |
287 | 294 | ||
288 | static inline bool intel_engine_has_started(struct intel_engine_cs *engine, | ||
289 | u32 seqno); | ||
290 | static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, | ||
291 | u32 seqno); | ||
292 | |||
293 | /** | 295 | /** |
294 | * Returns true if seq1 is later than seq2. | 296 | * Returns true if seq1 is later than seq2. |
295 | */ | 297 | */ |
@@ -298,6 +300,35 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) | |||
298 | return (s32)(seq1 - seq2) >= 0; | 300 | return (s32)(seq1 - seq2) >= 0; |
299 | } | 301 | } |
300 | 302 | ||
303 | static inline u32 __hwsp_seqno(const struct i915_request *rq) | ||
304 | { | ||
305 | return READ_ONCE(*rq->hwsp_seqno); | ||
306 | } | ||
307 | |||
308 | /** | ||
309 | * hwsp_seqno - the current breadcrumb value in the HW status page | ||
310 | * @rq: the request, to chase the relevant HW status page | ||
311 | * | ||
312 | * The emphasis in naming here is that hwsp_seqno() is not a property of the | ||
313 | * request, but an indication of the current HW state (associated with this | ||
314 | * request). Its value will change as the GPU executes more requests. | ||
315 | * | ||
316 | * Returns the current breadcrumb value in the associated HW status page (or | ||
317 | * the local timeline's equivalent) for this request. The request itself | ||
318 | * has the associated breadcrumb value of rq->fence.seqno, when the HW | ||
319 | * status page has that breadcrumb or later, this request is complete. | ||
320 | */ | ||
321 | static inline u32 hwsp_seqno(const struct i915_request *rq) | ||
322 | { | ||
323 | u32 seqno; | ||
324 | |||
325 | rcu_read_lock(); /* the HWSP may be freed at runtime */ | ||
326 | seqno = __hwsp_seqno(rq); | ||
327 | rcu_read_unlock(); | ||
328 | |||
329 | return seqno; | ||
330 | } | ||
331 | |||
301 | /** | 332 | /** |
302 | * i915_request_started - check if the request has begun being executed | 333 | * i915_request_started - check if the request has begun being executed |
303 | * @rq: the request | 334 | * @rq: the request |
@@ -315,14 +346,14 @@ static inline bool i915_request_started(const struct i915_request *rq) | |||
315 | if (!seqno) /* not yet submitted to HW */ | 346 | if (!seqno) /* not yet submitted to HW */ |
316 | return false; | 347 | return false; |
317 | 348 | ||
318 | return intel_engine_has_started(rq->engine, seqno); | 349 | return i915_seqno_passed(hwsp_seqno(rq), seqno - 1); |
319 | } | 350 | } |
320 | 351 | ||
321 | static inline bool | 352 | static inline bool |
322 | __i915_request_completed(const struct i915_request *rq, u32 seqno) | 353 | __i915_request_completed(const struct i915_request *rq, u32 seqno) |
323 | { | 354 | { |
324 | GEM_BUG_ON(!seqno); | 355 | GEM_BUG_ON(!seqno); |
325 | return intel_engine_has_completed(rq->engine, seqno) && | 356 | return i915_seqno_passed(hwsp_seqno(rq), seqno) && |
326 | seqno == i915_request_global_seqno(rq); | 357 | seqno == i915_request_global_seqno(rq); |
327 | } | 358 | } |
328 | 359 | ||
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2cf99c436658..9ae7f77293a0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c | |||
@@ -446,11 +446,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) | |||
446 | desc = execlists_update_context(rq); | 446 | desc = execlists_update_context(rq); |
447 | GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); | 447 | GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); |
448 | 448 | ||
449 | GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", | 449 | GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", |
450 | engine->name, n, | 450 | engine->name, n, |
451 | port[n].context_id, count, | 451 | port[n].context_id, count, |
452 | rq->global_seqno, | 452 | rq->global_seqno, |
453 | rq->fence.context, rq->fence.seqno, | 453 | rq->fence.context, rq->fence.seqno, |
454 | hwsp_seqno(rq), | ||
454 | intel_engine_get_seqno(engine), | 455 | intel_engine_get_seqno(engine), |
455 | rq_prio(rq)); | 456 | rq_prio(rq)); |
456 | } else { | 457 | } else { |
@@ -742,11 +743,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) | |||
742 | while (num_ports-- && port_isset(port)) { | 743 | while (num_ports-- && port_isset(port)) { |
743 | struct i915_request *rq = port_request(port); | 744 | struct i915_request *rq = port_request(port); |
744 | 745 | ||
745 | GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", | 746 | GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", |
746 | rq->engine->name, | 747 | rq->engine->name, |
747 | (unsigned int)(port - execlists->port), | 748 | (unsigned int)(port - execlists->port), |
748 | rq->global_seqno, | 749 | rq->global_seqno, |
749 | rq->fence.context, rq->fence.seqno, | 750 | rq->fence.context, rq->fence.seqno, |
751 | hwsp_seqno(rq), | ||
750 | intel_engine_get_seqno(rq->engine)); | 752 | intel_engine_get_seqno(rq->engine)); |
751 | 753 | ||
752 | GEM_BUG_ON(!execlists->active); | 754 | GEM_BUG_ON(!execlists->active); |
@@ -970,12 +972,13 @@ static void process_csb(struct intel_engine_cs *engine) | |||
970 | EXECLISTS_ACTIVE_USER)); | 972 | EXECLISTS_ACTIVE_USER)); |
971 | 973 | ||
972 | rq = port_unpack(port, &count); | 974 | rq = port_unpack(port, &count); |
973 | GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", | 975 | GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", |
974 | engine->name, | 976 | engine->name, |
975 | port->context_id, count, | 977 | port->context_id, count, |
976 | rq ? rq->global_seqno : 0, | 978 | rq ? rq->global_seqno : 0, |
977 | rq ? rq->fence.context : 0, | 979 | rq ? rq->fence.context : 0, |
978 | rq ? rq->fence.seqno : 0, | 980 | rq ? rq->fence.seqno : 0, |
981 | rq ? hwsp_seqno(rq) : 0, | ||
979 | intel_engine_get_seqno(engine), | 982 | intel_engine_get_seqno(engine), |
980 | rq ? rq_prio(rq) : 0); | 983 | rq ? rq_prio(rq) : 0); |
981 | 984 | ||