Merge tag 'drm-next-2019-05-16' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie: "A bunch of fixes for the merge window closure, doesn't seem to be anything too major or serious in there. It does add TU117 turing modesetting to nouveau but it's just an enable for preexisting code. amdgpu: - gpu reset at load crash fix - ATPX hotplug fix for when dGPU is off - SR-IOV fixes radeon: - r5xx pll fixes i915: - GVT (MCHBAR, buffer alignment, misc warnings fixes) - Fixes for newly enabled semaphore code - Geminilake disable framebuffer compression - HSW edp fast modeset fix - IRQ vs RCU race fix nouveau: - Turing modesetting fixes - TU117 support msm: - SDM845 bringup fixes panfrost: - static checker fixes pl111: - spinlock init fix. bridge: - refresh rate register fix for adv7511" * tag 'drm-next-2019-05-16' of git://anongit.freedesktop.org/drm/drm: (36 commits) drm/msm: Upgrade gxpd checks to IS_ERR_OR_NULL drm/msm/dpu: Remove duplicate header drm/pl111: Initialize clock spinlock early drm/msm: correct attempted NULL pointer dereference in debugfs drm/msm: remove resv fields from msm_gem_object struct drm/nouveau: fix duplication of nv50_head_atom struct drm/nouveau/disp/dp: respect sink limits when selecting failsafe link configuration drm/nouveau/core: initial support for boards with TU117 chipset drm/nouveau/core: allow detected chipset to be overridden drm/nouveau/kms/gf119-gp10x: push HeadSetControlOutputResource() mthd when encoders change drm/nouveau/kms/nv50-: fix bug preventing non-vsync'd page flips drm/nouveau/kms/gv100-: fix spurious window immediate interlocks drm/bridge: adv7511: Fix low refresh rate selection drm/panfrost: Add missing _fini() calls in panfrost_device_fini() drm/panfrost: Only put sync_out if non-NULL drm/i915: Seal races between async GPU cancellation, retirement and signaling drm/i915: Fix fastset vs. pfit on/off on HSW EDP transcoder drm/i915/fbc: disable framebuffer compression on GeminiLake drm/amdgpu/psp: move psp version specific function pointers to early_init drm/radeon: prefer lower reference dividers ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-16 10:22:42 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-16 10:22:42 -0400
commit: cc7ce90153e74f8266eefee9fba466faa1a2d5df (patch)
tree: 7ebac4bc27c2d400aca256c0b557c561540543e2 /drivers/gpu/drm/i915/i915_request.c
parent: 83f3ef3de625a5766de2382f9e077d4daafd5bac (diff)
parent: 8da0e1525b7f0d69c6cb44094963906282b32673 (diff)
1 files changed, 59 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..f6c78c0fa74b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
        if (i915_gem_context_is_banned(request->gem_context))
                i915_request_skip(request, -EIO);
+        /*
+         * Are we using semaphores when the gpu is already saturated?
+         *
+         * Using semaphores incurs a cost in having the GPU poll a
+         * memory location, busywaiting for it to change. The continual
+         * memory reads can have a noticeable impact on the rest of the
+         * system with the extra bus traffic, stalling the cpu as it too
+         * tries to access memory across the bus (perf stat -e bus-cycles).
+         *
+         * If we installed a semaphore on this request and we only submit
+         * the request after the signaler completed, that indicates the
+         * system is overloaded and using semaphores at this time only
+         * increases the amount of work we are doing. If so, we disable
+         * further use of semaphores until we are idle again, whence we
+         * optimistically try again.
+         */
+        if (request->sched.semaphores &&
+            i915_sw_fence_signaled(&request->semaphore))
+                request->hw_context->saturated |= request->sched.semaphores;
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
@@ -432,6 +452,7 @@ void __i915_request_submit(struct i915_request *request)
        set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+            !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
            !i915_request_enable_breadcrumb(request))
                intel_engine_queue_breadcrumbs(engine);
@@ -799,6 +820,39 @@ err_unreserve:
 }
 static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+        if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+                return 0;
+        signal = list_prev_entry(signal, ring_link);
+        if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+                return 0;
+        return i915_sw_fence_await_dma_fence(&rq->submit,
+                                             &signal->fence, 0,
+                                             I915_FENCE_GFP);
+}
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+        /*
+         * Polling a semaphore causes bus traffic, delaying other users of
+         * both the GPU and CPU. We want to limit the impact on others,
+         * while taking advantage of early submission to reduce GPU
+         * latency. Therefore we restrict ourselves to not using more
+         * than one semaphore from each source, and not using a semaphore
+         * if we have detected the engine is saturated (i.e. would not be
+         * submitted early and cause bus traffic reading an already passed
+         * semaphore).
+         *
+         * See the are-we-too-late? check in __i915_request_submit().
+         */
+        return rq->sched.semaphores | rq->hw_context->saturated;
+}
+static int
 emit_semaphore_wait(struct i915_request *to,
                    struct i915_request *from,
                    gfp_t gfp)
@@ -811,11 +865,15 @@ emit_semaphore_wait(struct i915_request *to,
        GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
        /* Just emit the first semaphore we see as request space is limited. */
-        if (to->sched.semaphores & from->engine->mask)
+        if (already_busywaiting(to) & from->engine->mask)
                return i915_sw_fence_await_dma_fence(&to->submit,
                                                     &from->fence, 0,
                                                     I915_FENCE_GFP);
+        err = i915_request_await_start(to, from);
+        if (err < 0)
+                return err;
        err = i915_sw_fence_await_dma_fence(&to->semaphore,
                                            &from->fence, 0,
                                            I915_FENCE_GFP);
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-16 10:22:42 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-16 10:22:42 -0400
commit	cc7ce90153e74f8266eefee9fba466faa1a2d5df (patch)
tree	7ebac4bc27c2d400aca256c0b557c561540543e2 /drivers/gpu/drm/i915/i915_request.c
parent	83f3ef3de625a5766de2382f9e077d4daafd5bac (diff)
parent	8da0e1525b7f0d69c6cb44094963906282b32673 (diff)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..f6c78c0fa74b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
425	if (i915_gem_context_is_banned(request->gem_context))	425	if (i915_gem_context_is_banned(request->gem_context))
426	i915_request_skip(request, -EIO);	426	i915_request_skip(request, -EIO);
427		427
		428	/*
		429	* Are we using semaphores when the gpu is already saturated?
		430	*
		431	* Using semaphores incurs a cost in having the GPU poll a
		432	* memory location, busywaiting for it to change. The continual
		433	* memory reads can have a noticeable impact on the rest of the
		434	* system with the extra bus traffic, stalling the cpu as it too
		435	* tries to access memory across the bus (perf stat -e bus-cycles).
		436	*
		437	* If we installed a semaphore on this request and we only submit
		438	* the request after the signaler completed, that indicates the
		439	* system is overloaded and using semaphores at this time only
		440	* increases the amount of work we are doing. If so, we disable
		441	* further use of semaphores until we are idle again, whence we
		442	* optimistically try again.
		443	*/
		444	if (request->sched.semaphores &&
		445	i915_sw_fence_signaled(&request->semaphore))
		446	request->hw_context->saturated \|= request->sched.semaphores;
		447
428	/* We may be recursing from the signal callback of another i915 fence */	448	/* We may be recursing from the signal callback of another i915 fence */
429	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);	449	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
430		450
@@ -432,6 +452,7 @@ void __i915_request_submit(struct i915_request *request)
432	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);	452	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
433		453
434	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&	454	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
		455	!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
435	!i915_request_enable_breadcrumb(request))	456	!i915_request_enable_breadcrumb(request))
436	intel_engine_queue_breadcrumbs(engine);	457	intel_engine_queue_breadcrumbs(engine);
437		458
@@ -799,6 +820,39 @@ err_unreserve:
799	}	820	}
800		821
801	static int	822	static int
		823	i915_request_await_start(struct i915_request rq, struct i915_request signal)
		824	{
		825	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
		826	return 0;
		827
		828	signal = list_prev_entry(signal, ring_link);
		829	if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
		830	return 0;
		831
		832	return i915_sw_fence_await_dma_fence(&rq->submit,
		833	&signal->fence, 0,
		834	I915_FENCE_GFP);
		835	}
		836
		837	static intel_engine_mask_t
		838	already_busywaiting(struct i915_request *rq)
		839	{
		840	/*
		841	* Polling a semaphore causes bus traffic, delaying other users of
		842	* both the GPU and CPU. We want to limit the impact on others,
		843	* while taking advantage of early submission to reduce GPU
		844	* latency. Therefore we restrict ourselves to not using more
		845	* than one semaphore from each source, and not using a semaphore
		846	* if we have detected the engine is saturated (i.e. would not be
		847	* submitted early and cause bus traffic reading an already passed
		848	* semaphore).
		849	*
		850	* See the are-we-too-late? check in __i915_request_submit().
		851	*/
		852	return rq->sched.semaphores \| rq->hw_context->saturated;
		853	}
		854
		855	static int
802	emit_semaphore_wait(struct i915_request *to,	856	emit_semaphore_wait(struct i915_request *to,
803	struct i915_request *from,	857	struct i915_request *from,
804	gfp_t gfp)	858	gfp_t gfp)
@@ -811,11 +865,15 @@ emit_semaphore_wait(struct i915_request *to,
811	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);	865	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
812		866
813	/* Just emit the first semaphore we see as request space is limited. */	867	/* Just emit the first semaphore we see as request space is limited. */
814	if (to->sched.semaphores & from->engine->mask)	868	if (already_busywaiting(to) & from->engine->mask)
815	return i915_sw_fence_await_dma_fence(&to->submit,	869	return i915_sw_fence_await_dma_fence(&to->submit,
816	&from->fence, 0,	870	&from->fence, 0,
817	I915_FENCE_GFP);	871	I915_FENCE_GFP);
818		872
		873	err = i915_request_await_start(to, from);
		874	if (err < 0)
		875	return err;
		876
819	err = i915_sw_fence_await_dma_fence(&to->semaphore,	877	err = i915_sw_fence_await_dma_fence(&to->semaphore,
820	&from->fence, 0,	878	&from->fence, 0,
821	I915_FENCE_GFP);	879	I915_FENCE_GFP);