drm/i915: Micro-optimise hotpath through intel_ring_begin()

Typically, there is space available within the ring and if not we have to wait (by definition a slow path). Rearrange the code to reduce the number of branches and stack size for the hotpath, accomodating a slight growth for the wait. v2: Fix the new assert that packets are not larger than the actual ring. v3: Make the parameters unsigned as well to make usage. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-3-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2017-05-04 09:08:46 -0400
committer: Chris Wilson <chris@chris-wilson.co.uk> 2017-05-04 10:40:38 -0400
commit: 5e5655c32de83a0151de0c4993d7783c22b6f9b4 (patch)
tree: 3436e783ba56065ce254fc3ac3c4de9582fab0be /drivers/gpu/drm/i915/intel_ringbuffer.c
parent: 95aebcb2da73079f9ecb7f4e353af71ff1f04c05 (diff)
1 files changed, 36 insertions, 31 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b308e73fcfae..acd1da9b62a3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
        return 0;
 }
-static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
+static noinline int wait_for_space(struct drm_i915_gem_request *req,
+                                   unsigned int bytes)
 {
        struct intel_ring *ring = req->ring;
        struct drm_i915_gem_request *target;
@@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
        return 0;
 }
-u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
+u32 *intel_ring_begin(struct drm_i915_gem_request *req,
+                      unsigned int num_dwords)
 {
        struct intel_ring *ring = req->ring;
-        int remain_actual = ring->size - ring->emit;
+        const unsigned int remain_usable = ring->effective_size - ring->emit;
-        int remain_usable = ring->effective_size - ring->emit;
+        const unsigned int bytes = num_dwords * sizeof(u32);
-        int bytes = num_dwords * sizeof(u32);
+        unsigned int need_wrap = 0;
-        int total_bytes, wait_bytes;
+        unsigned int total_bytes;
-        bool need_wrap = false;
        u32 *cs;
        total_bytes = bytes + req->reserved_space;
+        GEM_BUG_ON(total_bytes > ring->effective_size);
-        if (unlikely(bytes > remain_usable)) {
+        if (unlikely(total_bytes > remain_usable)) {
-                /*
+                const int remain_actual = ring->size - ring->emit;
-                 * Not enough space for the basic request. So need to flush
-                 * out the remainder and then wait for base + reserved.
+                if (bytes > remain_usable) {
-                 */
+                        /*
-                wait_bytes = remain_actual + total_bytes;
+                         * Not enough space for the basic request. So need to
-                need_wrap = true;
+                         * flush out the remainder and then wait for
-        } else if (unlikely(total_bytes > remain_usable)) {
+                         * base + reserved.
-                /*
+                         */
-                 * The base request will fit but the reserved space
+                        total_bytes += remain_actual;
-                 * falls off the end. So we don't need an immediate wrap
+                        need_wrap = remain_actual | 1;
-                 * and only need to effectively wait for the reserved
+                } else  {
-                 * size space from the start of ringbuffer.
+                        /*
-                 */
+                         * The base request will fit but the reserved space
-                wait_bytes = remain_actual + req->reserved_space;
+                         * falls off the end. So we don't need an immediate
-        } else {
+                         * wrap and only need to effectively wait for the
-                /* No wrapping required, just waiting. */
+                         * reserved size from the start of ringbuffer.
-                wait_bytes = total_bytes;
+                         */
+                        total_bytes = req->reserved_space + remain_actual;
+                }
        }
-        if (wait_bytes > ring->space) {
+        if (unlikely(total_bytes > ring->space)) {
-                int ret = wait_for_space(req, wait_bytes);
+                int ret = wait_for_space(req, total_bytes);
                if (unlikely(ret))
                        return ERR_PTR(ret);
        }
        if (unlikely(need_wrap)) {
-                GEM_BUG_ON(remain_actual > ring->space);
+                need_wrap &= ~1;
-                GEM_BUG_ON(ring->emit + remain_actual > ring->size);
+                GEM_BUG_ON(need_wrap > ring->space);
+                GEM_BUG_ON(ring->emit + need_wrap > ring->size);
                /* Fill the tail with MI_NOOP */
-                memset(ring->vaddr + ring->emit, 0, remain_actual);
+                memset(ring->vaddr + ring->emit, 0, need_wrap);
                ring->emit = 0;
-                ring->space -= remain_actual;
+                ring->space -= need_wrap;
        }
        GEM_BUG_ON(ring->emit > ring->size - bytes);
author	Chris Wilson <chris@chris-wilson.co.uk>	2017-05-04 09:08:46 -0400
committer	Chris Wilson <chris@chris-wilson.co.uk>	2017-05-04 10:40:38 -0400
commit	5e5655c32de83a0151de0c4993d7783c22b6f9b4 (patch)
tree	3436e783ba56065ce254fc3ac3c4de9582fab0be /drivers/gpu/drm/i915/intel_ringbuffer.c
parent	95aebcb2da73079f9ecb7f4e353af71ff1f04c05 (diff)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b308e73fcfae..acd1da9b62a3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
1656	return 0;	1656	return 0;
1657	}	1657	}
1658		1658
1659	static int wait_for_space(struct drm_i915_gem_request *req, int bytes)	1659	static noinline int wait_for_space(struct drm_i915_gem_request *req,
		1660	unsigned int bytes)
1660	{	1661	{
1661	struct intel_ring *ring = req->ring;	1662	struct intel_ring *ring = req->ring;
1662	struct drm_i915_gem_request *target;	1663	struct drm_i915_gem_request *target;
@@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1701	return 0;	1702	return 0;
1702	}	1703	}
1703		1704
1704	u32 intel_ring_begin(struct drm_i915_gem_request req, int num_dwords)	1705	u32 intel_ring_begin(struct drm_i915_gem_request req,
		1706	unsigned int num_dwords)
1705	{	1707	{
1706	struct intel_ring *ring = req->ring;	1708	struct intel_ring *ring = req->ring;
1707	int remain_actual = ring->size - ring->emit;	1709	const unsigned int remain_usable = ring->effective_size - ring->emit;
1708	int remain_usable = ring->effective_size - ring->emit;	1710	const unsigned int bytes = num_dwords * sizeof(u32);
1709	int bytes = num_dwords * sizeof(u32);	1711	unsigned int need_wrap = 0;
1710	int total_bytes, wait_bytes;	1712	unsigned int total_bytes;
1711	bool need_wrap = false;
1712	u32 *cs;	1713	u32 *cs;
1713		1714
1714	total_bytes = bytes + req->reserved_space;	1715	total_bytes = bytes + req->reserved_space;
		1716	GEM_BUG_ON(total_bytes > ring->effective_size);
1715		1717
1716	if (unlikely(bytes > remain_usable)) {	1718	if (unlikely(total_bytes > remain_usable)) {
1717	/*	1719	const int remain_actual = ring->size - ring->emit;
1718	* Not enough space for the basic request. So need to flush	1720
1719	* out the remainder and then wait for base + reserved.	1721	if (bytes > remain_usable) {
1720	*/	1722	/*
1721	wait_bytes = remain_actual + total_bytes;	1723	* Not enough space for the basic request. So need to
1722	need_wrap = true;	1724	* flush out the remainder and then wait for
1723	} else if (unlikely(total_bytes > remain_usable)) {	1725	* base + reserved.
1724	/*	1726	*/
1725	* The base request will fit but the reserved space	1727	total_bytes += remain_actual;
1726	* falls off the end. So we don't need an immediate wrap	1728	need_wrap = remain_actual \| 1;
1727	* and only need to effectively wait for the reserved	1729	} else {
1728	* size space from the start of ringbuffer.	1730	/*
1729	*/	1731	* The base request will fit but the reserved space
1730	wait_bytes = remain_actual + req->reserved_space;	1732	* falls off the end. So we don't need an immediate
1731	} else {	1733	* wrap and only need to effectively wait for the
1732	/* No wrapping required, just waiting. */	1734	* reserved size from the start of ringbuffer.
1733	wait_bytes = total_bytes;	1735	*/
		1736	total_bytes = req->reserved_space + remain_actual;
		1737	}
1734	}	1738	}
1735		1739
1736	if (wait_bytes > ring->space) {	1740	if (unlikely(total_bytes > ring->space)) {
1737	int ret = wait_for_space(req, wait_bytes);	1741	int ret = wait_for_space(req, total_bytes);
1738	if (unlikely(ret))	1742	if (unlikely(ret))
1739	return ERR_PTR(ret);	1743	return ERR_PTR(ret);
1740	}	1744	}
1741		1745
1742	if (unlikely(need_wrap)) {	1746	if (unlikely(need_wrap)) {
1743	GEM_BUG_ON(remain_actual > ring->space);	1747	need_wrap &= ~1;
1744	GEM_BUG_ON(ring->emit + remain_actual > ring->size);	1748	GEM_BUG_ON(need_wrap > ring->space);
		1749	GEM_BUG_ON(ring->emit + need_wrap > ring->size);
1745		1750
1746	/* Fill the tail with MI_NOOP */	1751	/* Fill the tail with MI_NOOP */
1747	memset(ring->vaddr + ring->emit, 0, remain_actual);	1752	memset(ring->vaddr + ring->emit, 0, need_wrap);
1748	ring->emit = 0;	1753	ring->emit = 0;
1749	ring->space -= remain_actual;	1754	ring->space -= need_wrap;
1750	}	1755	}
1751		1756
1752	GEM_BUG_ON(ring->emit > ring->size - bytes);	1757	GEM_BUG_ON(ring->emit > ring->size - bytes);