diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-05-04 09:08:46 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-05-04 10:40:38 -0400 |
commit | 5e5655c32de83a0151de0c4993d7783c22b6f9b4 (patch) | |
tree | 3436e783ba56065ce254fc3ac3c4de9582fab0be /drivers/gpu/drm/i915/intel_ringbuffer.c | |
parent | 95aebcb2da73079f9ecb7f4e353af71ff1f04c05 (diff) |
drm/i915: Micro-optimise hotpath through intel_ring_begin()
Typically, there is space available within the ring and if not we have
to wait (by definition a slow path). Rearrange the code to reduce the
number of branches and stack size for the hotpath, accomodating a slight
growth for the wait.
v2: Fix the new assert that packets are not larger than the actual ring.
v3: Make the parameters unsigned as well to make usage.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-3-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 67 |
1 files changed, 36 insertions, 31 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b308e73fcfae..acd1da9b62a3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) | |||
1656 | return 0; | 1656 | return 0; |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | static int wait_for_space(struct drm_i915_gem_request *req, int bytes) | 1659 | static noinline int wait_for_space(struct drm_i915_gem_request *req, |
1660 | unsigned int bytes) | ||
1660 | { | 1661 | { |
1661 | struct intel_ring *ring = req->ring; | 1662 | struct intel_ring *ring = req->ring; |
1662 | struct drm_i915_gem_request *target; | 1663 | struct drm_i915_gem_request *target; |
@@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) | |||
1701 | return 0; | 1702 | return 0; |
1702 | } | 1703 | } |
1703 | 1704 | ||
1704 | u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) | 1705 | u32 *intel_ring_begin(struct drm_i915_gem_request *req, |
1706 | unsigned int num_dwords) | ||
1705 | { | 1707 | { |
1706 | struct intel_ring *ring = req->ring; | 1708 | struct intel_ring *ring = req->ring; |
1707 | int remain_actual = ring->size - ring->emit; | 1709 | const unsigned int remain_usable = ring->effective_size - ring->emit; |
1708 | int remain_usable = ring->effective_size - ring->emit; | 1710 | const unsigned int bytes = num_dwords * sizeof(u32); |
1709 | int bytes = num_dwords * sizeof(u32); | 1711 | unsigned int need_wrap = 0; |
1710 | int total_bytes, wait_bytes; | 1712 | unsigned int total_bytes; |
1711 | bool need_wrap = false; | ||
1712 | u32 *cs; | 1713 | u32 *cs; |
1713 | 1714 | ||
1714 | total_bytes = bytes + req->reserved_space; | 1715 | total_bytes = bytes + req->reserved_space; |
1716 | GEM_BUG_ON(total_bytes > ring->effective_size); | ||
1715 | 1717 | ||
1716 | if (unlikely(bytes > remain_usable)) { | 1718 | if (unlikely(total_bytes > remain_usable)) { |
1717 | /* | 1719 | const int remain_actual = ring->size - ring->emit; |
1718 | * Not enough space for the basic request. So need to flush | 1720 | |
1719 | * out the remainder and then wait for base + reserved. | 1721 | if (bytes > remain_usable) { |
1720 | */ | 1722 | /* |
1721 | wait_bytes = remain_actual + total_bytes; | 1723 | * Not enough space for the basic request. So need to |
1722 | need_wrap = true; | 1724 | * flush out the remainder and then wait for |
1723 | } else if (unlikely(total_bytes > remain_usable)) { | 1725 | * base + reserved. |
1724 | /* | 1726 | */ |
1725 | * The base request will fit but the reserved space | 1727 | total_bytes += remain_actual; |
1726 | * falls off the end. So we don't need an immediate wrap | 1728 | need_wrap = remain_actual | 1; |
1727 | * and only need to effectively wait for the reserved | 1729 | } else { |
1728 | * size space from the start of ringbuffer. | 1730 | /* |
1729 | */ | 1731 | * The base request will fit but the reserved space |
1730 | wait_bytes = remain_actual + req->reserved_space; | 1732 | * falls off the end. So we don't need an immediate |
1731 | } else { | 1733 | * wrap and only need to effectively wait for the |
1732 | /* No wrapping required, just waiting. */ | 1734 | * reserved size from the start of ringbuffer. |
1733 | wait_bytes = total_bytes; | 1735 | */ |
1736 | total_bytes = req->reserved_space + remain_actual; | ||
1737 | } | ||
1734 | } | 1738 | } |
1735 | 1739 | ||
1736 | if (wait_bytes > ring->space) { | 1740 | if (unlikely(total_bytes > ring->space)) { |
1737 | int ret = wait_for_space(req, wait_bytes); | 1741 | int ret = wait_for_space(req, total_bytes); |
1738 | if (unlikely(ret)) | 1742 | if (unlikely(ret)) |
1739 | return ERR_PTR(ret); | 1743 | return ERR_PTR(ret); |
1740 | } | 1744 | } |
1741 | 1745 | ||
1742 | if (unlikely(need_wrap)) { | 1746 | if (unlikely(need_wrap)) { |
1743 | GEM_BUG_ON(remain_actual > ring->space); | 1747 | need_wrap &= ~1; |
1744 | GEM_BUG_ON(ring->emit + remain_actual > ring->size); | 1748 | GEM_BUG_ON(need_wrap > ring->space); |
1749 | GEM_BUG_ON(ring->emit + need_wrap > ring->size); | ||
1745 | 1750 | ||
1746 | /* Fill the tail with MI_NOOP */ | 1751 | /* Fill the tail with MI_NOOP */ |
1747 | memset(ring->vaddr + ring->emit, 0, remain_actual); | 1752 | memset(ring->vaddr + ring->emit, 0, need_wrap); |
1748 | ring->emit = 0; | 1753 | ring->emit = 0; |
1749 | ring->space -= remain_actual; | 1754 | ring->space -= need_wrap; |
1750 | } | 1755 | } |
1751 | 1756 | ||
1752 | GEM_BUG_ON(ring->emit > ring->size - bytes); | 1757 | GEM_BUG_ON(ring->emit > ring->size - bytes); |