diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-10-09 14:24:37 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-01-20 07:11:16 -0500 |
commit | d0a57789d5ec807fc218151b2fb2de4da30fbef5 (patch) | |
tree | 7725177c02c17a8df63ac146322b5d80411e60c7 | |
parent | 1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c (diff) |
drm/i915: Only insert the mb() before updating the fence parameter
With a fence, we only need to insert a memory barrier around the actual
fence alteration for CPU accesses through the GTT. Performing the
barrier in flush-fence was inserting unnecessary and expensive barriers
for never fenced objects.
Note removing the barriers from flush-fence, which was effectively a
barrier before every direct access through the GTT, revealed that we
where missing a barrier before the first access through the GTT. Lack of
that barrier was sufficient to cause GPU hangs.
v2: Add a couple more comments to explain the new barriers
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2ca901194824..ce706555d011 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, | |||
2611 | POSTING_READ(FENCE_REG_830_0 + reg * 4); | 2611 | POSTING_READ(FENCE_REG_830_0 + reg * 4); |
2612 | } | 2612 | } |
2613 | 2613 | ||
2614 | inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) | ||
2615 | { | ||
2616 | return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; | ||
2617 | } | ||
2618 | |||
2614 | static void i915_gem_write_fence(struct drm_device *dev, int reg, | 2619 | static void i915_gem_write_fence(struct drm_device *dev, int reg, |
2615 | struct drm_i915_gem_object *obj) | 2620 | struct drm_i915_gem_object *obj) |
2616 | { | 2621 | { |
2622 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
2623 | |||
2624 | /* Ensure that all CPU reads are completed before installing a fence | ||
2625 | * and all writes before removing the fence. | ||
2626 | */ | ||
2627 | if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) | ||
2628 | mb(); | ||
2629 | |||
2617 | switch (INTEL_INFO(dev)->gen) { | 2630 | switch (INTEL_INFO(dev)->gen) { |
2618 | case 7: | 2631 | case 7: |
2619 | case 6: | 2632 | case 6: |
@@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, | |||
2623 | case 2: i830_write_fence_reg(dev, reg, obj); break; | 2636 | case 2: i830_write_fence_reg(dev, reg, obj); break; |
2624 | default: BUG(); | 2637 | default: BUG(); |
2625 | } | 2638 | } |
2639 | |||
2640 | /* And similarly be paranoid that no direct access to this region | ||
2641 | * is reordered to before the fence is installed. | ||
2642 | */ | ||
2643 | if (i915_gem_object_needs_mb(obj)) | ||
2644 | mb(); | ||
2626 | } | 2645 | } |
2627 | 2646 | ||
2628 | static inline int fence_number(struct drm_i915_private *dev_priv, | 2647 | static inline int fence_number(struct drm_i915_private *dev_priv, |
@@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, | |||
2652 | } | 2671 | } |
2653 | 2672 | ||
2654 | static int | 2673 | static int |
2655 | i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) | 2674 | i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) |
2656 | { | 2675 | { |
2657 | if (obj->last_fenced_seqno) { | 2676 | if (obj->last_fenced_seqno) { |
2658 | int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); | 2677 | int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); |
@@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) | |||
2662 | obj->last_fenced_seqno = 0; | 2681 | obj->last_fenced_seqno = 0; |
2663 | } | 2682 | } |
2664 | 2683 | ||
2665 | /* Ensure that all CPU reads are completed before installing a fence | ||
2666 | * and all writes before removing the fence. | ||
2667 | */ | ||
2668 | if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) | ||
2669 | mb(); | ||
2670 | |||
2671 | obj->fenced_gpu_access = false; | 2684 | obj->fenced_gpu_access = false; |
2672 | return 0; | 2685 | return 0; |
2673 | } | 2686 | } |
@@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) | |||
2678 | struct drm_i915_private *dev_priv = obj->base.dev->dev_private; | 2691 | struct drm_i915_private *dev_priv = obj->base.dev->dev_private; |
2679 | int ret; | 2692 | int ret; |
2680 | 2693 | ||
2681 | ret = i915_gem_object_flush_fence(obj); | 2694 | ret = i915_gem_object_wait_fence(obj); |
2682 | if (ret) | 2695 | if (ret) |
2683 | return ret; | 2696 | return ret; |
2684 | 2697 | ||
@@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) | |||
2752 | * will need to serialise the write to the associated fence register? | 2765 | * will need to serialise the write to the associated fence register? |
2753 | */ | 2766 | */ |
2754 | if (obj->fence_dirty) { | 2767 | if (obj->fence_dirty) { |
2755 | ret = i915_gem_object_flush_fence(obj); | 2768 | ret = i915_gem_object_wait_fence(obj); |
2756 | if (ret) | 2769 | if (ret) |
2757 | return ret; | 2770 | return ret; |
2758 | } | 2771 | } |
@@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) | |||
2773 | if (reg->obj) { | 2786 | if (reg->obj) { |
2774 | struct drm_i915_gem_object *old = reg->obj; | 2787 | struct drm_i915_gem_object *old = reg->obj; |
2775 | 2788 | ||
2776 | ret = i915_gem_object_flush_fence(old); | 2789 | ret = i915_gem_object_wait_fence(old); |
2777 | if (ret) | 2790 | if (ret) |
2778 | return ret; | 2791 | return ret; |
2779 | 2792 | ||
@@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) | |||
3068 | 3081 | ||
3069 | i915_gem_object_flush_cpu_write_domain(obj); | 3082 | i915_gem_object_flush_cpu_write_domain(obj); |
3070 | 3083 | ||
3084 | /* Serialise direct access to this object with the barriers for | ||
3085 | * coherent writes from the GPU, by effectively invalidating the | ||
3086 | * GTT domain upon first access. | ||
3087 | */ | ||
3088 | if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) | ||
3089 | mb(); | ||
3090 | |||
3071 | old_write_domain = obj->base.write_domain; | 3091 | old_write_domain = obj->base.write_domain; |
3072 | old_read_domains = obj->base.read_domains; | 3092 | old_read_domains = obj->base.read_domains; |
3073 | 3093 | ||