diff options
author | Oscar Mateo <oscar.mateo@intel.com> | 2014-07-24 12:04:29 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2014-08-14 16:02:55 -0400 |
commit | 48e29f5535b9eb506c44bd8f41bd9348fd219435 (patch) | |
tree | 0dc24897e29bcccdf9965d78a938752b2c799712 | |
parent | 582d67f0b19afc2299bc8977aba835d8d25bb591 (diff) |
drm/i915/bdw: Emission of requests with logical rings
On a previous iteration of this patch, I created an Execlists
version of __i915_add_request and asbtracted it away as a
vfunc. Daniel Vetter wondered then why that was needed:
"with the clean split in command submission I expect every
function to know wether it'll submit to an lrc (everything in
intel_lrc.c) or wether it'll submit to a legacy ring (existing
code), so I don't see a need for an add_request vfunc."
The honest, hairy truth is that this patch is the glue keeping
the whole logical ring puzzle together:
- i915_add_request is used by intel_ring_idle, which in turn is
used by i915_gpu_idle, which in turn is used in several places
inside the eviction and gtt codes.
- Also, it is used by i915_gem_check_olr, which is littered all
over i915_gem.c
- ...
If I were to duplicate all the code that directly or indirectly
uses __i915_add_request, I'll end up creating a separate driver.
To show the differences between the existing legacy version and
the new Execlists one, this time I have special-cased
__i915_add_request instead of adding an add_request vfunc. I
hope this helps to untangle this Gordian knot.
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
[danvet: Adjust to ringbuf->FIXME_lrc_ctx per the discussion with
Thomas Daniel.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 72 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.h | 1 |
3 files changed, 80 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c2f0b886eb0..32fa1e9eb844 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -2311,10 +2311,21 @@ int __i915_add_request(struct intel_engine_cs *ring, | |||
2311 | { | 2311 | { |
2312 | struct drm_i915_private *dev_priv = ring->dev->dev_private; | 2312 | struct drm_i915_private *dev_priv = ring->dev->dev_private; |
2313 | struct drm_i915_gem_request *request; | 2313 | struct drm_i915_gem_request *request; |
2314 | struct intel_ringbuffer *ringbuf; | ||
2314 | u32 request_ring_position, request_start; | 2315 | u32 request_ring_position, request_start; |
2315 | int ret; | 2316 | int ret; |
2316 | 2317 | ||
2317 | request_start = intel_ring_get_tail(ring->buffer); | 2318 | request = ring->preallocated_lazy_request; |
2319 | if (WARN_ON(request == NULL)) | ||
2320 | return -ENOMEM; | ||
2321 | |||
2322 | if (i915.enable_execlists) { | ||
2323 | struct intel_context *ctx = request->ctx; | ||
2324 | ringbuf = ctx->engine[ring->id].ringbuf; | ||
2325 | } else | ||
2326 | ringbuf = ring->buffer; | ||
2327 | |||
2328 | request_start = intel_ring_get_tail(ringbuf); | ||
2318 | /* | 2329 | /* |
2319 | * Emit any outstanding flushes - execbuf can fail to emit the flush | 2330 | * Emit any outstanding flushes - execbuf can fail to emit the flush |
2320 | * after having emitted the batchbuffer command. Hence we need to fix | 2331 | * after having emitted the batchbuffer command. Hence we need to fix |
@@ -2322,24 +2333,32 @@ int __i915_add_request(struct intel_engine_cs *ring, | |||
2322 | * is that the flush _must_ happen before the next request, no matter | 2333 | * is that the flush _must_ happen before the next request, no matter |
2323 | * what. | 2334 | * what. |
2324 | */ | 2335 | */ |
2325 | ret = intel_ring_flush_all_caches(ring); | 2336 | if (i915.enable_execlists) { |
2326 | if (ret) | 2337 | ret = logical_ring_flush_all_caches(ringbuf); |
2327 | return ret; | 2338 | if (ret) |
2328 | 2339 | return ret; | |
2329 | request = ring->preallocated_lazy_request; | 2340 | } else { |
2330 | if (WARN_ON(request == NULL)) | 2341 | ret = intel_ring_flush_all_caches(ring); |
2331 | return -ENOMEM; | 2342 | if (ret) |
2343 | return ret; | ||
2344 | } | ||
2332 | 2345 | ||
2333 | /* Record the position of the start of the request so that | 2346 | /* Record the position of the start of the request so that |
2334 | * should we detect the updated seqno part-way through the | 2347 | * should we detect the updated seqno part-way through the |
2335 | * GPU processing the request, we never over-estimate the | 2348 | * GPU processing the request, we never over-estimate the |
2336 | * position of the head. | 2349 | * position of the head. |
2337 | */ | 2350 | */ |
2338 | request_ring_position = intel_ring_get_tail(ring->buffer); | 2351 | request_ring_position = intel_ring_get_tail(ringbuf); |
2339 | 2352 | ||
2340 | ret = ring->add_request(ring); | 2353 | if (i915.enable_execlists) { |
2341 | if (ret) | 2354 | ret = ring->emit_request(ringbuf); |
2342 | return ret; | 2355 | if (ret) |
2356 | return ret; | ||
2357 | } else { | ||
2358 | ret = ring->add_request(ring); | ||
2359 | if (ret) | ||
2360 | return ret; | ||
2361 | } | ||
2343 | 2362 | ||
2344 | request->seqno = intel_ring_get_seqno(ring); | 2363 | request->seqno = intel_ring_get_seqno(ring); |
2345 | request->ring = ring; | 2364 | request->ring = ring; |
@@ -2354,12 +2373,14 @@ int __i915_add_request(struct intel_engine_cs *ring, | |||
2354 | */ | 2373 | */ |
2355 | request->batch_obj = obj; | 2374 | request->batch_obj = obj; |
2356 | 2375 | ||
2357 | /* Hold a reference to the current context so that we can inspect | 2376 | if (!i915.enable_execlists) { |
2358 | * it later in case a hangcheck error event fires. | 2377 | /* Hold a reference to the current context so that we can inspect |
2359 | */ | 2378 | * it later in case a hangcheck error event fires. |
2360 | request->ctx = ring->last_context; | 2379 | */ |
2361 | if (request->ctx) | 2380 | request->ctx = ring->last_context; |
2362 | i915_gem_context_reference(request->ctx); | 2381 | if (request->ctx) |
2382 | i915_gem_context_reference(request->ctx); | ||
2383 | } | ||
2363 | 2384 | ||
2364 | request->emitted_jiffies = jiffies; | 2385 | request->emitted_jiffies = jiffies; |
2365 | list_add_tail(&request->list, &ring->request_list); | 2386 | list_add_tail(&request->list, &ring->request_list); |
@@ -2614,6 +2635,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) | |||
2614 | 2635 | ||
2615 | while (!list_empty(&ring->request_list)) { | 2636 | while (!list_empty(&ring->request_list)) { |
2616 | struct drm_i915_gem_request *request; | 2637 | struct drm_i915_gem_request *request; |
2638 | struct intel_ringbuffer *ringbuf; | ||
2617 | 2639 | ||
2618 | request = list_first_entry(&ring->request_list, | 2640 | request = list_first_entry(&ring->request_list, |
2619 | struct drm_i915_gem_request, | 2641 | struct drm_i915_gem_request, |
@@ -2623,12 +2645,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) | |||
2623 | break; | 2645 | break; |
2624 | 2646 | ||
2625 | trace_i915_gem_request_retire(ring, request->seqno); | 2647 | trace_i915_gem_request_retire(ring, request->seqno); |
2648 | |||
2649 | /* This is one of the few common intersection points | ||
2650 | * between legacy ringbuffer submission and execlists: | ||
2651 | * we need to tell them apart in order to find the correct | ||
2652 | * ringbuffer to which the request belongs to. | ||
2653 | */ | ||
2654 | if (i915.enable_execlists) { | ||
2655 | struct intel_context *ctx = request->ctx; | ||
2656 | ringbuf = ctx->engine[ring->id].ringbuf; | ||
2657 | } else | ||
2658 | ringbuf = ring->buffer; | ||
2659 | |||
2626 | /* We know the GPU must have read the request to have | 2660 | /* We know the GPU must have read the request to have |
2627 | * sent us the seqno + interrupt, so use the position | 2661 | * sent us the seqno + interrupt, so use the position |
2628 | * of tail of the request to update the last known position | 2662 | * of tail of the request to update the last known position |
2629 | * of the GPU head. | 2663 | * of the GPU head. |
2630 | */ | 2664 | */ |
2631 | ring->buffer->last_retired_head = request->tail; | 2665 | ringbuf->last_retired_head = request->tail; |
2632 | 2666 | ||
2633 | i915_gem_free_request(request); | 2667 | i915_gem_free_request(request); |
2634 | } | 2668 | } |
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c2352d1b23fa..cd6ddd80e54c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c | |||
@@ -252,6 +252,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) | |||
252 | I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); | 252 | I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); |
253 | } | 253 | } |
254 | 254 | ||
255 | int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) | ||
256 | { | ||
257 | struct intel_engine_cs *ring = ringbuf->ring; | ||
258 | int ret; | ||
259 | |||
260 | if (!ring->gpu_caches_dirty) | ||
261 | return 0; | ||
262 | |||
263 | ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); | ||
264 | if (ret) | ||
265 | return ret; | ||
266 | |||
267 | ring->gpu_caches_dirty = false; | ||
268 | return 0; | ||
269 | } | ||
270 | |||
255 | void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) | 271 | void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) |
256 | { | 272 | { |
257 | intel_logical_ring_advance(ringbuf); | 273 | intel_logical_ring_advance(ringbuf); |
@@ -262,7 +278,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) | |||
262 | /* TODO: how to submit a context to the ELSP is not here yet */ | 278 | /* TODO: how to submit a context to the ELSP is not here yet */ |
263 | } | 279 | } |
264 | 280 | ||
265 | static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) | 281 | static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, |
282 | struct intel_context *ctx) | ||
266 | { | 283 | { |
267 | if (ring->outstanding_lazy_seqno) | 284 | if (ring->outstanding_lazy_seqno) |
268 | return 0; | 285 | return 0; |
@@ -274,6 +291,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) | |||
274 | if (request == NULL) | 291 | if (request == NULL) |
275 | return -ENOMEM; | 292 | return -ENOMEM; |
276 | 293 | ||
294 | /* Hold a reference to the context this request belongs to | ||
295 | * (we will need it when the time comes to emit/retire the | ||
296 | * request). | ||
297 | */ | ||
298 | request->ctx = ctx; | ||
299 | i915_gem_context_reference(request->ctx); | ||
300 | |||
277 | ring->preallocated_lazy_request = request; | 301 | ring->preallocated_lazy_request = request; |
278 | } | 302 | } |
279 | 303 | ||
@@ -312,8 +336,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, | |||
312 | if (ret) | 336 | if (ret) |
313 | return ret; | 337 | return ret; |
314 | 338 | ||
315 | /* TODO: make sure we update the right ringbuffer's last_retired_head | ||
316 | * when retiring requests */ | ||
317 | i915_gem_retire_requests_ring(ring); | 339 | i915_gem_retire_requests_ring(ring); |
318 | ringbuf->head = ringbuf->last_retired_head; | 340 | ringbuf->head = ringbuf->last_retired_head; |
319 | ringbuf->last_retired_head = -1; | 341 | ringbuf->last_retired_head = -1; |
@@ -433,7 +455,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) | |||
433 | return ret; | 455 | return ret; |
434 | 456 | ||
435 | /* Preallocate the olr before touching the ring */ | 457 | /* Preallocate the olr before touching the ring */ |
436 | ret = logical_ring_alloc_seqno(ring); | 458 | ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx); |
437 | if (ret) | 459 | if (ret) |
438 | return ret; | 460 | return ret; |
439 | 461 | ||
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4e032875c1fd..460e1af15600 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h | |||
@@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); | |||
29 | void intel_logical_ring_cleanup(struct intel_engine_cs *ring); | 29 | void intel_logical_ring_cleanup(struct intel_engine_cs *ring); |
30 | int intel_logical_rings_init(struct drm_device *dev); | 30 | int intel_logical_rings_init(struct drm_device *dev); |
31 | 31 | ||
32 | int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); | ||
32 | void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); | 33 | void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); |
33 | static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) | 34 | static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) |
34 | { | 35 | { |