diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 60 |
1 files changed, 59 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..f6c78c0fa74b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c | |||
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request) | |||
425 | if (i915_gem_context_is_banned(request->gem_context)) | 425 | if (i915_gem_context_is_banned(request->gem_context)) |
426 | i915_request_skip(request, -EIO); | 426 | i915_request_skip(request, -EIO); |
427 | 427 | ||
428 | /* | ||
429 | * Are we using semaphores when the gpu is already saturated? | ||
430 | * | ||
431 | * Using semaphores incurs a cost in having the GPU poll a | ||
432 | * memory location, busywaiting for it to change. The continual | ||
433 | * memory reads can have a noticeable impact on the rest of the | ||
434 | * system with the extra bus traffic, stalling the cpu as it too | ||
435 | * tries to access memory across the bus (perf stat -e bus-cycles). | ||
436 | * | ||
437 | * If we installed a semaphore on this request and we only submit | ||
438 | * the request after the signaler completed, that indicates the | ||
439 | * system is overloaded and using semaphores at this time only | ||
440 | * increases the amount of work we are doing. If so, we disable | ||
441 | * further use of semaphores until we are idle again, whence we | ||
442 | * optimistically try again. | ||
443 | */ | ||
444 | if (request->sched.semaphores && | ||
445 | i915_sw_fence_signaled(&request->semaphore)) | ||
446 | request->hw_context->saturated |= request->sched.semaphores; | ||
447 | |||
428 | /* We may be recursing from the signal callback of another i915 fence */ | 448 | /* We may be recursing from the signal callback of another i915 fence */ |
429 | spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); | 449 | spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); |
430 | 450 | ||
@@ -432,6 +452,7 @@ void __i915_request_submit(struct i915_request *request) | |||
432 | set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); | 452 | set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); |
433 | 453 | ||
434 | if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && | 454 | if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && |
455 | !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && | ||
435 | !i915_request_enable_breadcrumb(request)) | 456 | !i915_request_enable_breadcrumb(request)) |
436 | intel_engine_queue_breadcrumbs(engine); | 457 | intel_engine_queue_breadcrumbs(engine); |
437 | 458 | ||
@@ -799,6 +820,39 @@ err_unreserve: | |||
799 | } | 820 | } |
800 | 821 | ||
801 | static int | 822 | static int |
823 | i915_request_await_start(struct i915_request *rq, struct i915_request *signal) | ||
824 | { | ||
825 | if (list_is_first(&signal->ring_link, &signal->ring->request_list)) | ||
826 | return 0; | ||
827 | |||
828 | signal = list_prev_entry(signal, ring_link); | ||
829 | if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) | ||
830 | return 0; | ||
831 | |||
832 | return i915_sw_fence_await_dma_fence(&rq->submit, | ||
833 | &signal->fence, 0, | ||
834 | I915_FENCE_GFP); | ||
835 | } | ||
836 | |||
837 | static intel_engine_mask_t | ||
838 | already_busywaiting(struct i915_request *rq) | ||
839 | { | ||
840 | /* | ||
841 | * Polling a semaphore causes bus traffic, delaying other users of | ||
842 | * both the GPU and CPU. We want to limit the impact on others, | ||
843 | * while taking advantage of early submission to reduce GPU | ||
844 | * latency. Therefore we restrict ourselves to not using more | ||
845 | * than one semaphore from each source, and not using a semaphore | ||
846 | * if we have detected the engine is saturated (i.e. would not be | ||
847 | * submitted early and cause bus traffic reading an already passed | ||
848 | * semaphore). | ||
849 | * | ||
850 | * See the are-we-too-late? check in __i915_request_submit(). | ||
851 | */ | ||
852 | return rq->sched.semaphores | rq->hw_context->saturated; | ||
853 | } | ||
854 | |||
855 | static int | ||
802 | emit_semaphore_wait(struct i915_request *to, | 856 | emit_semaphore_wait(struct i915_request *to, |
803 | struct i915_request *from, | 857 | struct i915_request *from, |
804 | gfp_t gfp) | 858 | gfp_t gfp) |
@@ -811,11 +865,15 @@ emit_semaphore_wait(struct i915_request *to, | |||
811 | GEM_BUG_ON(INTEL_GEN(to->i915) < 8); | 865 | GEM_BUG_ON(INTEL_GEN(to->i915) < 8); |
812 | 866 | ||
813 | /* Just emit the first semaphore we see as request space is limited. */ | 867 | /* Just emit the first semaphore we see as request space is limited. */ |
814 | if (to->sched.semaphores & from->engine->mask) | 868 | if (already_busywaiting(to) & from->engine->mask) |
815 | return i915_sw_fence_await_dma_fence(&to->submit, | 869 | return i915_sw_fence_await_dma_fence(&to->submit, |
816 | &from->fence, 0, | 870 | &from->fence, 0, |
817 | I915_FENCE_GFP); | 871 | I915_FENCE_GFP); |
818 | 872 | ||
873 | err = i915_request_await_start(to, from); | ||
874 | if (err < 0) | ||
875 | return err; | ||
876 | |||
819 | err = i915_sw_fence_await_dma_fence(&to->semaphore, | 877 | err = i915_sw_fence_await_dma_fence(&to->semaphore, |
820 | &from->fence, 0, | 878 | &from->fence, 0, |
821 | I915_FENCE_GFP); | 879 | I915_FENCE_GFP); |