aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOscar Mateo <oscar.mateo@intel.com>2014-07-24 12:04:29 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-08-14 16:02:55 -0400
commit48e29f5535b9eb506c44bd8f41bd9348fd219435 (patch)
tree0dc24897e29bcccdf9965d78a938752b2c799712
parent582d67f0b19afc2299bc8977aba835d8d25bb591 (diff)
drm/i915/bdw: Emission of requests with logical rings
On a previous iteration of this patch, I created an Execlists version of __i915_add_request and asbtracted it away as a vfunc. Daniel Vetter wondered then why that was needed: "with the clean split in command submission I expect every function to know wether it'll submit to an lrc (everything in intel_lrc.c) or wether it'll submit to a legacy ring (existing code), so I don't see a need for an add_request vfunc." The honest, hairy truth is that this patch is the glue keeping the whole logical ring puzzle together: - i915_add_request is used by intel_ring_idle, which in turn is used by i915_gpu_idle, which in turn is used in several places inside the eviction and gtt codes. - Also, it is used by i915_gem_check_olr, which is littered all over i915_gem.c - ... If I were to duplicate all the code that directly or indirectly uses __i915_add_request, I'll end up creating a separate driver. To show the differences between the existing legacy version and the new Execlists one, this time I have special-cased __i915_add_request instead of adding an add_request vfunc. I hope this helps to untangle this Gordian knot. Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com> [danvet: Adjust to ringbuf->FIXME_lrc_ctx per the discussion with Thomas Daniel.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c72
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c30
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.h1
3 files changed, 80 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c2f0b886eb0..32fa1e9eb844 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2311,10 +2311,21 @@ int __i915_add_request(struct intel_engine_cs *ring,
2311{ 2311{
2312 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2312 struct drm_i915_private *dev_priv = ring->dev->dev_private;
2313 struct drm_i915_gem_request *request; 2313 struct drm_i915_gem_request *request;
2314 struct intel_ringbuffer *ringbuf;
2314 u32 request_ring_position, request_start; 2315 u32 request_ring_position, request_start;
2315 int ret; 2316 int ret;
2316 2317
2317 request_start = intel_ring_get_tail(ring->buffer); 2318 request = ring->preallocated_lazy_request;
2319 if (WARN_ON(request == NULL))
2320 return -ENOMEM;
2321
2322 if (i915.enable_execlists) {
2323 struct intel_context *ctx = request->ctx;
2324 ringbuf = ctx->engine[ring->id].ringbuf;
2325 } else
2326 ringbuf = ring->buffer;
2327
2328 request_start = intel_ring_get_tail(ringbuf);
2318 /* 2329 /*
2319 * Emit any outstanding flushes - execbuf can fail to emit the flush 2330 * Emit any outstanding flushes - execbuf can fail to emit the flush
2320 * after having emitted the batchbuffer command. Hence we need to fix 2331 * after having emitted the batchbuffer command. Hence we need to fix
@@ -2322,24 +2333,32 @@ int __i915_add_request(struct intel_engine_cs *ring,
2322 * is that the flush _must_ happen before the next request, no matter 2333 * is that the flush _must_ happen before the next request, no matter
2323 * what. 2334 * what.
2324 */ 2335 */
2325 ret = intel_ring_flush_all_caches(ring); 2336 if (i915.enable_execlists) {
2326 if (ret) 2337 ret = logical_ring_flush_all_caches(ringbuf);
2327 return ret; 2338 if (ret)
2328 2339 return ret;
2329 request = ring->preallocated_lazy_request; 2340 } else {
2330 if (WARN_ON(request == NULL)) 2341 ret = intel_ring_flush_all_caches(ring);
2331 return -ENOMEM; 2342 if (ret)
2343 return ret;
2344 }
2332 2345
2333 /* Record the position of the start of the request so that 2346 /* Record the position of the start of the request so that
2334 * should we detect the updated seqno part-way through the 2347 * should we detect the updated seqno part-way through the
2335 * GPU processing the request, we never over-estimate the 2348 * GPU processing the request, we never over-estimate the
2336 * position of the head. 2349 * position of the head.
2337 */ 2350 */
2338 request_ring_position = intel_ring_get_tail(ring->buffer); 2351 request_ring_position = intel_ring_get_tail(ringbuf);
2339 2352
2340 ret = ring->add_request(ring); 2353 if (i915.enable_execlists) {
2341 if (ret) 2354 ret = ring->emit_request(ringbuf);
2342 return ret; 2355 if (ret)
2356 return ret;
2357 } else {
2358 ret = ring->add_request(ring);
2359 if (ret)
2360 return ret;
2361 }
2343 2362
2344 request->seqno = intel_ring_get_seqno(ring); 2363 request->seqno = intel_ring_get_seqno(ring);
2345 request->ring = ring; 2364 request->ring = ring;
@@ -2354,12 +2373,14 @@ int __i915_add_request(struct intel_engine_cs *ring,
2354 */ 2373 */
2355 request->batch_obj = obj; 2374 request->batch_obj = obj;
2356 2375
2357 /* Hold a reference to the current context so that we can inspect 2376 if (!i915.enable_execlists) {
2358 * it later in case a hangcheck error event fires. 2377 /* Hold a reference to the current context so that we can inspect
2359 */ 2378 * it later in case a hangcheck error event fires.
2360 request->ctx = ring->last_context; 2379 */
2361 if (request->ctx) 2380 request->ctx = ring->last_context;
2362 i915_gem_context_reference(request->ctx); 2381 if (request->ctx)
2382 i915_gem_context_reference(request->ctx);
2383 }
2363 2384
2364 request->emitted_jiffies = jiffies; 2385 request->emitted_jiffies = jiffies;
2365 list_add_tail(&request->list, &ring->request_list); 2386 list_add_tail(&request->list, &ring->request_list);
@@ -2614,6 +2635,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2614 2635
2615 while (!list_empty(&ring->request_list)) { 2636 while (!list_empty(&ring->request_list)) {
2616 struct drm_i915_gem_request *request; 2637 struct drm_i915_gem_request *request;
2638 struct intel_ringbuffer *ringbuf;
2617 2639
2618 request = list_first_entry(&ring->request_list, 2640 request = list_first_entry(&ring->request_list,
2619 struct drm_i915_gem_request, 2641 struct drm_i915_gem_request,
@@ -2623,12 +2645,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2623 break; 2645 break;
2624 2646
2625 trace_i915_gem_request_retire(ring, request->seqno); 2647 trace_i915_gem_request_retire(ring, request->seqno);
2648
2649 /* This is one of the few common intersection points
2650 * between legacy ringbuffer submission and execlists:
2651 * we need to tell them apart in order to find the correct
2652 * ringbuffer to which the request belongs to.
2653 */
2654 if (i915.enable_execlists) {
2655 struct intel_context *ctx = request->ctx;
2656 ringbuf = ctx->engine[ring->id].ringbuf;
2657 } else
2658 ringbuf = ring->buffer;
2659
2626 /* We know the GPU must have read the request to have 2660 /* We know the GPU must have read the request to have
2627 * sent us the seqno + interrupt, so use the position 2661 * sent us the seqno + interrupt, so use the position
2628 * of tail of the request to update the last known position 2662 * of tail of the request to update the last known position
2629 * of the GPU head. 2663 * of the GPU head.
2630 */ 2664 */
2631 ring->buffer->last_retired_head = request->tail; 2665 ringbuf->last_retired_head = request->tail;
2632 2666
2633 i915_gem_free_request(request); 2667 i915_gem_free_request(request);
2634 } 2668 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c2352d1b23fa..cd6ddd80e54c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -252,6 +252,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
252 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); 252 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
253} 253}
254 254
255int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
256{
257 struct intel_engine_cs *ring = ringbuf->ring;
258 int ret;
259
260 if (!ring->gpu_caches_dirty)
261 return 0;
262
263 ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
264 if (ret)
265 return ret;
266
267 ring->gpu_caches_dirty = false;
268 return 0;
269}
270
255void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) 271void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
256{ 272{
257 intel_logical_ring_advance(ringbuf); 273 intel_logical_ring_advance(ringbuf);
@@ -262,7 +278,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
262 /* TODO: how to submit a context to the ELSP is not here yet */ 278 /* TODO: how to submit a context to the ELSP is not here yet */
263} 279}
264 280
265static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) 281static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
282 struct intel_context *ctx)
266{ 283{
267 if (ring->outstanding_lazy_seqno) 284 if (ring->outstanding_lazy_seqno)
268 return 0; 285 return 0;
@@ -274,6 +291,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
274 if (request == NULL) 291 if (request == NULL)
275 return -ENOMEM; 292 return -ENOMEM;
276 293
294 /* Hold a reference to the context this request belongs to
295 * (we will need it when the time comes to emit/retire the
296 * request).
297 */
298 request->ctx = ctx;
299 i915_gem_context_reference(request->ctx);
300
277 ring->preallocated_lazy_request = request; 301 ring->preallocated_lazy_request = request;
278 } 302 }
279 303
@@ -312,8 +336,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
312 if (ret) 336 if (ret)
313 return ret; 337 return ret;
314 338
315 /* TODO: make sure we update the right ringbuffer's last_retired_head
316 * when retiring requests */
317 i915_gem_retire_requests_ring(ring); 339 i915_gem_retire_requests_ring(ring);
318 ringbuf->head = ringbuf->last_retired_head; 340 ringbuf->head = ringbuf->last_retired_head;
319 ringbuf->last_retired_head = -1; 341 ringbuf->last_retired_head = -1;
@@ -433,7 +455,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
433 return ret; 455 return ret;
434 456
435 /* Preallocate the olr before touching the ring */ 457 /* Preallocate the olr before touching the ring */
436 ret = logical_ring_alloc_seqno(ring); 458 ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
437 if (ret) 459 if (ret)
438 return ret; 460 return ret;
439 461
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4e032875c1fd..460e1af15600 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
29void intel_logical_ring_cleanup(struct intel_engine_cs *ring); 29void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
30int intel_logical_rings_init(struct drm_device *dev); 30int intel_logical_rings_init(struct drm_device *dev);
31 31
32int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
32void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); 33void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
33static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) 34static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
34{ 35{