aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-05-04 09:08:46 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2017-05-04 10:40:38 -0400
commit5e5655c32de83a0151de0c4993d7783c22b6f9b4 (patch)
tree3436e783ba56065ce254fc3ac3c4de9582fab0be /drivers/gpu/drm/i915/intel_ringbuffer.c
parent95aebcb2da73079f9ecb7f4e353af71ff1f04c05 (diff)
drm/i915: Micro-optimise hotpath through intel_ring_begin()
Typically, there is space available within the ring and if not we have to wait (by definition a slow path). Rearrange the code to reduce the number of branches and stack size for the hotpath, accomodating a slight growth for the wait. v2: Fix the new assert that packets are not larger than the actual ring. v3: Make the parameters unsigned as well to make usage. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-3-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c67
1 files changed, 36 insertions, 31 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b308e73fcfae..acd1da9b62a3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
1656 return 0; 1656 return 0;
1657} 1657}
1658 1658
1659static int wait_for_space(struct drm_i915_gem_request *req, int bytes) 1659static noinline int wait_for_space(struct drm_i915_gem_request *req,
1660 unsigned int bytes)
1660{ 1661{
1661 struct intel_ring *ring = req->ring; 1662 struct intel_ring *ring = req->ring;
1662 struct drm_i915_gem_request *target; 1663 struct drm_i915_gem_request *target;
@@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1701 return 0; 1702 return 0;
1702} 1703}
1703 1704
1704u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) 1705u32 *intel_ring_begin(struct drm_i915_gem_request *req,
1706 unsigned int num_dwords)
1705{ 1707{
1706 struct intel_ring *ring = req->ring; 1708 struct intel_ring *ring = req->ring;
1707 int remain_actual = ring->size - ring->emit; 1709 const unsigned int remain_usable = ring->effective_size - ring->emit;
1708 int remain_usable = ring->effective_size - ring->emit; 1710 const unsigned int bytes = num_dwords * sizeof(u32);
1709 int bytes = num_dwords * sizeof(u32); 1711 unsigned int need_wrap = 0;
1710 int total_bytes, wait_bytes; 1712 unsigned int total_bytes;
1711 bool need_wrap = false;
1712 u32 *cs; 1713 u32 *cs;
1713 1714
1714 total_bytes = bytes + req->reserved_space; 1715 total_bytes = bytes + req->reserved_space;
1716 GEM_BUG_ON(total_bytes > ring->effective_size);
1715 1717
1716 if (unlikely(bytes > remain_usable)) { 1718 if (unlikely(total_bytes > remain_usable)) {
1717 /* 1719 const int remain_actual = ring->size - ring->emit;
1718 * Not enough space for the basic request. So need to flush 1720
1719 * out the remainder and then wait for base + reserved. 1721 if (bytes > remain_usable) {
1720 */ 1722 /*
1721 wait_bytes = remain_actual + total_bytes; 1723 * Not enough space for the basic request. So need to
1722 need_wrap = true; 1724 * flush out the remainder and then wait for
1723 } else if (unlikely(total_bytes > remain_usable)) { 1725 * base + reserved.
1724 /* 1726 */
1725 * The base request will fit but the reserved space 1727 total_bytes += remain_actual;
1726 * falls off the end. So we don't need an immediate wrap 1728 need_wrap = remain_actual | 1;
1727 * and only need to effectively wait for the reserved 1729 } else {
1728 * size space from the start of ringbuffer. 1730 /*
1729 */ 1731 * The base request will fit but the reserved space
1730 wait_bytes = remain_actual + req->reserved_space; 1732 * falls off the end. So we don't need an immediate
1731 } else { 1733 * wrap and only need to effectively wait for the
1732 /* No wrapping required, just waiting. */ 1734 * reserved size from the start of ringbuffer.
1733 wait_bytes = total_bytes; 1735 */
1736 total_bytes = req->reserved_space + remain_actual;
1737 }
1734 } 1738 }
1735 1739
1736 if (wait_bytes > ring->space) { 1740 if (unlikely(total_bytes > ring->space)) {
1737 int ret = wait_for_space(req, wait_bytes); 1741 int ret = wait_for_space(req, total_bytes);
1738 if (unlikely(ret)) 1742 if (unlikely(ret))
1739 return ERR_PTR(ret); 1743 return ERR_PTR(ret);
1740 } 1744 }
1741 1745
1742 if (unlikely(need_wrap)) { 1746 if (unlikely(need_wrap)) {
1743 GEM_BUG_ON(remain_actual > ring->space); 1747 need_wrap &= ~1;
1744 GEM_BUG_ON(ring->emit + remain_actual > ring->size); 1748 GEM_BUG_ON(need_wrap > ring->space);
1749 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
1745 1750
1746 /* Fill the tail with MI_NOOP */ 1751 /* Fill the tail with MI_NOOP */
1747 memset(ring->vaddr + ring->emit, 0, remain_actual); 1752 memset(ring->vaddr + ring->emit, 0, need_wrap);
1748 ring->emit = 0; 1753 ring->emit = 0;
1749 ring->space -= remain_actual; 1754 ring->space -= need_wrap;
1750 } 1755 }
1751 1756
1752 GEM_BUG_ON(ring->emit > ring->size - bytes); 1757 GEM_BUG_ON(ring->emit > ring->size - bytes);