aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_display.c
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2014-06-19 10:01:59 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-06-19 12:14:47 -0400
commitf99d70690e0755696e4e68404c8993431efda469 (patch)
tree2b5db420af053264781ce68dbb6dc37a7e4852a8 /drivers/gpu/drm/i915/intel_display.c
parentcc36513ca36a03e4fd8693d127bac5fe48185786 (diff)
drm/i915: Track frontbuffer invalidation/flushing
So these are the guts of the new beast. This tracks when a frontbuffer gets invalidated (due to frontbuffer rendering) and hence should be constantly scaned out, and when it's flushed again and can be compressed/one-shot-upload. Rules for flushing are simple: The frontbuffer needs one more full upload starting from the next vblank. Which means that the flushing can _only_ be called once the frontbuffer update has been latched. But this poses a problem for pageflips: We can't just delay the flushing until the pageflip is latched, since that would pose the risk that we override frontbuffer rendering that has been scheduled in-between the pageflip ioctl and the actual latching. To handle this track asynchronous invalidations (and also pageflip) state per-ring and delay any in-between flushing until the rendering has completed. And also cancel any delayed flushing if we get a new invalidation request (whether delayed or not). Also call intel_mark_fb_busy in both cases in all cases to make sure that we keep the screen at the highest refresh rate both on flips, synchronous plane updates and for frontbuffer rendering. v2: Lots of improvements Suggestions from Chris: - Move invalidate/flush in flush_*_domain and set_to_*_domain. - Drop the flush in busy_ioctl since it's redundant. Was a leftover from an earlier concept to track flips/delayed flushes. - Don't forget about the initial modeset enable/final disable. Suggested by Chris. Track flips accurately, too. Since flips complete independently of rendering we need to track pending flips in a separate mask. Again if an invalidate happens we need to cancel the evenutal flush to avoid races. v3: Provide correct header declarations for flip functions. Currently not needed outside of intel_display.c, but part of the proper interface. v4: Add proper domain management to fbcon so that the fbcon buffer is also tracked correctly. v5: Fixup locking around the fbcon set_to_gtt_domain call. v6: More comments from Chris: - Split out fbcon changes. - Drop superflous checks for potential scanout before calling intel_fb functions - we can micro-optimize this later. - s/intel_fb_/intel_fb_obj_/ to make it clear that this deals in gem object. We already have precedence for fb_obj in the pin_and_fence functions. v7: Clarify the semantics of the flip flush handling by renaming things a bit: - Don't go through a gem object but take the relevant frontbuffer bits directly. These functions center on the plane, the actual object is irrelevant - even a flip to the same object as already active should cause a flush. - Add a new intel_frontbuffer_flip for synchronous plane updates. It currently just calls intel_frontbuffer_flush since the implemenation differs. This way we achieve a clear split between one-shot update events on one side and frontbuffer rendering with potentially a very long delay between the invalidate and flush. Chris and I also had some discussions about mark_busy and whether it is appropriate to call from flush. But mark busy is a state which should be derived from the 3 events (invalidate, flush, flip) we now have by the users, like psr does by tracking relevant information in psr.busy_frontbuffer_bits. DRRS (the only real use of mark_busy for frontbuffer) needs to have similar logic. With that the overall mark_busy in the core could be removed. v8: Only when retiring gpu buffers only flush frontbuffer bits we actually invalidated in a batch. Just for safety since before any additional usage/invalidate we should always retire current rendering. Suggested by Chris Wilson. v9: Actually use intel_frontbuffer_flip in all appropriate places. Spotted by Chris. v10: Address more comments from Chris: - Don't call _flip in set_base when the crtc is inactive, avoids redunancy in the modeset case with the initial enabling of all planes. - Add comments explaining that the initial/final plane enable/disable still has work left to do before it's fully generic. v11: Only invalidate for gtt/cpu access when writing. Spotted by Chris. v12: s/_flush/_flip/ in intel_overlay.c per Chris' comment. Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_display.c')
-rw-r--r--drivers/gpu/drm/i915/intel_display.c191
1 files changed, 180 insertions, 11 deletions
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9a3fa90660f4..ff6336793826 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2756,6 +2756,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
2756 2756
2757 dev_priv->display.update_primary_plane(crtc, fb, x, y); 2757 dev_priv->display.update_primary_plane(crtc, fb, x, y);
2758 2758
2759 if (intel_crtc->active)
2760 intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_PRIMARY(pipe));
2761
2759 crtc->primary->fb = fb; 2762 crtc->primary->fb = fb;
2760 crtc->x = x; 2763 crtc->x = x;
2761 crtc->y = y; 2764 crtc->y = y;
@@ -3950,6 +3953,13 @@ static void intel_crtc_enable_planes(struct drm_crtc *crtc)
3950 mutex_lock(&dev->struct_mutex); 3953 mutex_lock(&dev->struct_mutex);
3951 intel_update_fbc(dev); 3954 intel_update_fbc(dev);
3952 mutex_unlock(&dev->struct_mutex); 3955 mutex_unlock(&dev->struct_mutex);
3956
3957 /*
3958 * FIXME: Once we grow proper nuclear flip support out of this we need
3959 * to compute the mask of flip planes precisely. For the time being
3960 * consider this a flip from a NULL plane.
3961 */
3962 intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
3953} 3963}
3954 3964
3955static void intel_crtc_disable_planes(struct drm_crtc *crtc) 3965static void intel_crtc_disable_planes(struct drm_crtc *crtc)
@@ -3972,6 +3982,13 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc)
3972 intel_disable_planes(crtc); 3982 intel_disable_planes(crtc);
3973 intel_disable_primary_hw_plane(dev_priv, plane, pipe); 3983 intel_disable_primary_hw_plane(dev_priv, plane, pipe);
3974 3984
3985 /*
3986 * FIXME: Once we grow proper nuclear flip support out of this we need
3987 * to compute the mask of flip planes precisely. For the time being
3988 * consider this a flip to a NULL plane.
3989 */
3990 intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
3991
3975 drm_vblank_off(dev, pipe); 3992 drm_vblank_off(dev, pipe);
3976} 3993}
3977 3994
@@ -8212,6 +8229,8 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
8212 intel_crtc_update_cursor(crtc, intel_crtc->cursor_bo != NULL); 8229 intel_crtc_update_cursor(crtc, intel_crtc->cursor_bo != NULL);
8213 } 8230 }
8214 8231
8232 intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_CURSOR(pipe));
8233
8215 return 0; 8234 return 0;
8216fail_unpin: 8235fail_unpin:
8217 i915_gem_object_unpin_from_display_plane(obj); 8236 i915_gem_object_unpin_from_display_plane(obj);
@@ -8827,20 +8846,26 @@ out:
8827} 8846}
8828 8847
8829 8848
8830void intel_mark_fb_busy(struct drm_i915_gem_object *obj, 8849/**
8831 struct intel_engine_cs *ring) 8850 * intel_mark_fb_busy - mark given planes as busy
8851 * @dev: DRM device
8852 * @frontbuffer_bits: bits for the affected planes
8853 * @ring: optional ring for asynchronous commands
8854 *
8855 * This function gets called every time the screen contents change. It can be
8856 * used to keep e.g. the update rate at the nominal refresh rate with DRRS.
8857 */
8858static void intel_mark_fb_busy(struct drm_device *dev,
8859 unsigned frontbuffer_bits,
8860 struct intel_engine_cs *ring)
8832{ 8861{
8833 struct drm_device *dev = obj->base.dev;
8834 enum pipe pipe; 8862 enum pipe pipe;
8835 8863
8836 intel_edp_psr_exit(dev);
8837
8838 if (!i915.powersave) 8864 if (!i915.powersave)
8839 return; 8865 return;
8840 8866
8841 for_each_pipe(pipe) { 8867 for_each_pipe(pipe) {
8842 if (!(obj->frontbuffer_bits & 8868 if (!(frontbuffer_bits & INTEL_FRONTBUFFER_ALL_MASK(pipe)))
8843 INTEL_FRONTBUFFER_ALL_MASK(pipe)))
8844 continue; 8869 continue;
8845 8870
8846 intel_increase_pllclock(dev, pipe); 8871 intel_increase_pllclock(dev, pipe);
@@ -8849,6 +8874,150 @@ void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
8849 } 8874 }
8850} 8875}
8851 8876
8877/**
8878 * intel_fb_obj_invalidate - invalidate frontbuffer object
8879 * @obj: GEM object to invalidate
8880 * @ring: set for asynchronous rendering
8881 *
8882 * This function gets called every time rendering on the given object starts and
8883 * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must
8884 * be invalidated. If @ring is non-NULL any subsequent invalidation will be delayed
8885 * until the rendering completes or a flip on this frontbuffer plane is
8886 * scheduled.
8887 */
8888void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
8889 struct intel_engine_cs *ring)
8890{
8891 struct drm_device *dev = obj->base.dev;
8892 struct drm_i915_private *dev_priv = dev->dev_private;
8893
8894 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
8895
8896 if (!obj->frontbuffer_bits)
8897 return;
8898
8899 if (ring) {
8900 mutex_lock(&dev_priv->fb_tracking.lock);
8901 dev_priv->fb_tracking.busy_bits
8902 |= obj->frontbuffer_bits;
8903 dev_priv->fb_tracking.flip_bits
8904 &= ~obj->frontbuffer_bits;
8905 mutex_unlock(&dev_priv->fb_tracking.lock);
8906 }
8907
8908 intel_mark_fb_busy(dev, obj->frontbuffer_bits, ring);
8909
8910 intel_edp_psr_exit(dev);
8911}
8912
8913/**
8914 * intel_frontbuffer_flush - flush frontbuffer
8915 * @dev: DRM device
8916 * @frontbuffer_bits: frontbuffer plane tracking bits
8917 *
8918 * This function gets called every time rendering on the given planes has
8919 * completed and frontbuffer caching can be started again. Flushes will get
8920 * delayed if they're blocked by some oustanding asynchronous rendering.
8921 *
8922 * Can be called without any locks held.
8923 */
8924void intel_frontbuffer_flush(struct drm_device *dev,
8925 unsigned frontbuffer_bits)
8926{
8927 struct drm_i915_private *dev_priv = dev->dev_private;
8928
8929 /* Delay flushing when rings are still busy.*/
8930 mutex_lock(&dev_priv->fb_tracking.lock);
8931 frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
8932 mutex_unlock(&dev_priv->fb_tracking.lock);
8933
8934 intel_mark_fb_busy(dev, frontbuffer_bits, NULL);
8935
8936 intel_edp_psr_exit(dev);
8937}
8938
8939/**
8940 * intel_fb_obj_flush - flush frontbuffer object
8941 * @obj: GEM object to flush
8942 * @retire: set when retiring asynchronous rendering
8943 *
8944 * This function gets called every time rendering on the given object has
8945 * completed and frontbuffer caching can be started again. If @retire is true
8946 * then any delayed flushes will be unblocked.
8947 */
8948void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
8949 bool retire)
8950{
8951 struct drm_device *dev = obj->base.dev;
8952 struct drm_i915_private *dev_priv = dev->dev_private;
8953 unsigned frontbuffer_bits;
8954
8955 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
8956
8957 if (!obj->frontbuffer_bits)
8958 return;
8959
8960 frontbuffer_bits = obj->frontbuffer_bits;
8961
8962 if (retire) {
8963 mutex_lock(&dev_priv->fb_tracking.lock);
8964 /* Filter out new bits since rendering started. */
8965 frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
8966
8967 dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
8968 mutex_unlock(&dev_priv->fb_tracking.lock);
8969 }
8970
8971 intel_frontbuffer_flush(dev, frontbuffer_bits);
8972}
8973
8974/**
8975 * intel_frontbuffer_flip_prepare - prepare asnychronous frontbuffer flip
8976 * @dev: DRM device
8977 * @frontbuffer_bits: frontbuffer plane tracking bits
8978 *
8979 * This function gets called after scheduling a flip on @obj. The actual
8980 * frontbuffer flushing will be delayed until completion is signalled with
8981 * intel_frontbuffer_flip_complete. If an invalidate happens in between this
8982 * flush will be cancelled.
8983 *
8984 * Can be called without any locks held.
8985 */
8986void intel_frontbuffer_flip_prepare(struct drm_device *dev,
8987 unsigned frontbuffer_bits)
8988{
8989 struct drm_i915_private *dev_priv = dev->dev_private;
8990
8991 mutex_lock(&dev_priv->fb_tracking.lock);
8992 dev_priv->fb_tracking.flip_bits
8993 |= frontbuffer_bits;
8994 mutex_unlock(&dev_priv->fb_tracking.lock);
8995}
8996
8997/**
8998 * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flush
8999 * @dev: DRM device
9000 * @frontbuffer_bits: frontbuffer plane tracking bits
9001 *
9002 * This function gets called after the flip has been latched and will complete
9003 * on the next vblank. It will execute the fush if it hasn't been cancalled yet.
9004 *
9005 * Can be called without any locks held.
9006 */
9007void intel_frontbuffer_flip_complete(struct drm_device *dev,
9008 unsigned frontbuffer_bits)
9009{
9010 struct drm_i915_private *dev_priv = dev->dev_private;
9011
9012 mutex_lock(&dev_priv->fb_tracking.lock);
9013 /* Mask any cancelled flips. */
9014 frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
9015 dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
9016 mutex_unlock(&dev_priv->fb_tracking.lock);
9017
9018 intel_frontbuffer_flush(dev, frontbuffer_bits);
9019}
9020
8852static void intel_crtc_destroy(struct drm_crtc *crtc) 9021static void intel_crtc_destroy(struct drm_crtc *crtc)
8853{ 9022{
8854 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 9023 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -8876,6 +9045,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
8876 struct intel_unpin_work *work = 9045 struct intel_unpin_work *work =
8877 container_of(__work, struct intel_unpin_work, work); 9046 container_of(__work, struct intel_unpin_work, work);
8878 struct drm_device *dev = work->crtc->dev; 9047 struct drm_device *dev = work->crtc->dev;
9048 enum pipe pipe = to_intel_crtc(work->crtc)->pipe;
8879 9049
8880 mutex_lock(&dev->struct_mutex); 9050 mutex_lock(&dev->struct_mutex);
8881 intel_unpin_fb_obj(work->old_fb_obj); 9051 intel_unpin_fb_obj(work->old_fb_obj);
@@ -8885,6 +9055,8 @@ static void intel_unpin_work_fn(struct work_struct *__work)
8885 intel_update_fbc(dev); 9055 intel_update_fbc(dev);
8886 mutex_unlock(&dev->struct_mutex); 9056 mutex_unlock(&dev->struct_mutex);
8887 9057
9058 intel_frontbuffer_flip_complete(dev, INTEL_FRONTBUFFER_PRIMARY(pipe));
9059
8888 BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0); 9060 BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
8889 atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count); 9061 atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
8890 9062
@@ -9441,9 +9613,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
9441 if (work == NULL) 9613 if (work == NULL)
9442 return -ENOMEM; 9614 return -ENOMEM;
9443 9615
9444 /* Exit PSR early in page flip */
9445 intel_edp_psr_exit(dev);
9446
9447 work->event = event; 9616 work->event = event;
9448 work->crtc = crtc; 9617 work->crtc = crtc;
9449 work->old_fb_obj = to_intel_framebuffer(old_fb)->obj; 9618 work->old_fb_obj = to_intel_framebuffer(old_fb)->obj;
@@ -9519,7 +9688,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
9519 INTEL_FRONTBUFFER_PRIMARY(pipe)); 9688 INTEL_FRONTBUFFER_PRIMARY(pipe));
9520 9689
9521 intel_disable_fbc(dev); 9690 intel_disable_fbc(dev);
9522 intel_mark_fb_busy(obj, NULL); 9691 intel_frontbuffer_flip_prepare(dev, INTEL_FRONTBUFFER_PRIMARY(pipe));
9523 mutex_unlock(&dev->struct_mutex); 9692 mutex_unlock(&dev->struct_mutex);
9524 9693
9525 trace_i915_flip_request(intel_crtc->plane, obj); 9694 trace_i915_flip_request(intel_crtc->plane, obj);