aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-10-09 14:24:37 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-01-20 07:11:16 -0500
commitd0a57789d5ec807fc218151b2fb2de4da30fbef5 (patch)
tree7725177c02c17a8df63ac146322b5d80411e60c7
parent1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c (diff)
drm/i915: Only insert the mb() before updating the fence parameter
With a fence, we only need to insert a memory barrier around the actual fence alteration for CPU accesses through the GTT. Performing the barrier in flush-fence was inserting unnecessary and expensive barriers for never fenced objects. Note removing the barriers from flush-fence, which was effectively a barrier before every direct access through the GTT, revealed that we where missing a barrier before the first access through the GTT. Lack of that barrier was sufficient to cause GPU hangs. v2: Add a couple more comments to explain the new barriers Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c40
1 files changed, 30 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2ca901194824..ce706555d011 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
2611 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2611 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2612} 2612}
2613 2613
2614inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
2615{
2616 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
2617}
2618
2614static void i915_gem_write_fence(struct drm_device *dev, int reg, 2619static void i915_gem_write_fence(struct drm_device *dev, int reg,
2615 struct drm_i915_gem_object *obj) 2620 struct drm_i915_gem_object *obj)
2616{ 2621{
2622 struct drm_i915_private *dev_priv = dev->dev_private;
2623
2624 /* Ensure that all CPU reads are completed before installing a fence
2625 * and all writes before removing the fence.
2626 */
2627 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
2628 mb();
2629
2617 switch (INTEL_INFO(dev)->gen) { 2630 switch (INTEL_INFO(dev)->gen) {
2618 case 7: 2631 case 7:
2619 case 6: 2632 case 6:
@@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
2623 case 2: i830_write_fence_reg(dev, reg, obj); break; 2636 case 2: i830_write_fence_reg(dev, reg, obj); break;
2624 default: BUG(); 2637 default: BUG();
2625 } 2638 }
2639
2640 /* And similarly be paranoid that no direct access to this region
2641 * is reordered to before the fence is installed.
2642 */
2643 if (i915_gem_object_needs_mb(obj))
2644 mb();
2626} 2645}
2627 2646
2628static inline int fence_number(struct drm_i915_private *dev_priv, 2647static inline int fence_number(struct drm_i915_private *dev_priv,
@@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2652} 2671}
2653 2672
2654static int 2673static int
2655i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 2674i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
2656{ 2675{
2657 if (obj->last_fenced_seqno) { 2676 if (obj->last_fenced_seqno) {
2658 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2677 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
@@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2662 obj->last_fenced_seqno = 0; 2681 obj->last_fenced_seqno = 0;
2663 } 2682 }
2664 2683
2665 /* Ensure that all CPU reads are completed before installing a fence
2666 * and all writes before removing the fence.
2667 */
2668 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2669 mb();
2670
2671 obj->fenced_gpu_access = false; 2684 obj->fenced_gpu_access = false;
2672 return 0; 2685 return 0;
2673} 2686}
@@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2678 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2691 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2679 int ret; 2692 int ret;
2680 2693
2681 ret = i915_gem_object_flush_fence(obj); 2694 ret = i915_gem_object_wait_fence(obj);
2682 if (ret) 2695 if (ret)
2683 return ret; 2696 return ret;
2684 2697
@@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2752 * will need to serialise the write to the associated fence register? 2765 * will need to serialise the write to the associated fence register?
2753 */ 2766 */
2754 if (obj->fence_dirty) { 2767 if (obj->fence_dirty) {
2755 ret = i915_gem_object_flush_fence(obj); 2768 ret = i915_gem_object_wait_fence(obj);
2756 if (ret) 2769 if (ret)
2757 return ret; 2770 return ret;
2758 } 2771 }
@@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2773 if (reg->obj) { 2786 if (reg->obj) {
2774 struct drm_i915_gem_object *old = reg->obj; 2787 struct drm_i915_gem_object *old = reg->obj;
2775 2788
2776 ret = i915_gem_object_flush_fence(old); 2789 ret = i915_gem_object_wait_fence(old);
2777 if (ret) 2790 if (ret)
2778 return ret; 2791 return ret;
2779 2792
@@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3068 3081
3069 i915_gem_object_flush_cpu_write_domain(obj); 3082 i915_gem_object_flush_cpu_write_domain(obj);
3070 3083
3084 /* Serialise direct access to this object with the barriers for
3085 * coherent writes from the GPU, by effectively invalidating the
3086 * GTT domain upon first access.
3087 */
3088 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3089 mb();
3090
3071 old_write_domain = obj->base.write_domain; 3091 old_write_domain = obj->base.write_domain;
3072 old_read_domains = obj->base.read_domains; 3092 old_read_domains = obj->base.read_domains;
3073 3093