aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-07-10 08:36:23 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-07-10 08:41:46 -0400
commitd18b9619034230b6f945e215276425636ca401fe (patch)
treecd8abc2786852bd6c5848138424c02c208c34a6a /drivers/gpu
parent02978ff57a5bdfbf703d2bc5a4d933a53ede3144 (diff)
drm/i915: Fix incoherence with fence updates on Sandybridge+
This hopefully fixes the root cause behind the workaround added in commit 25ff1195f8a0b3724541ae7bbe331b4296de9c06 Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu Apr 4 21:31:03 2013 +0100 drm/i915: Workaround incoherence between fences and LLC across multiple CPUs Thanks to further investigation by Jon Bloomfield, he realised that the 64-bit register might be broken up by the hardware into two 32-bit writes (a problem we have encountered elsewhere). This non-atomicity would then cause an issue where a second thread would see an intermediate register state (new high dword, old low dword), and this register would randomly be used in preference to its own thread register. This would cause the second thread to read from and write into a fairly random tiled location. Breaking the operation into 3 explicit 32-bit updates (first disable the fence, poke the upper bits, then poke the lower bits and enable) ensures that, given proper serialisation between the 32-bit register write and the memory transfer, that the fence value is always consistent. Armed with this knowledge, we can explain how the previous workaround work. The key to the corruption is that a second thread sees an erroneous fence register that conflicts and overrides its own. By serialising the fence update across all CPUs, we have a small window where no GTT access is occurring and so hide the potential corruption. This also leads to the conclusion that the earlier workaround was incomplete. v2: Be overly paranoid about the order in which fence updates become visible to the GPU to make really sure that we turn the fence off before doing the update, and then only switch the fence on afterwards. Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Carsten Emde <C.Emde@osadl.org> Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c30
1 files changed, 24 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8fd8e82ebda4..a34e8e2ba98a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2668,7 +2668,6 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
2668 drm_i915_private_t *dev_priv = dev->dev_private; 2668 drm_i915_private_t *dev_priv = dev->dev_private;
2669 int fence_reg; 2669 int fence_reg;
2670 int fence_pitch_shift; 2670 int fence_pitch_shift;
2671 uint64_t val;
2672 2671
2673 if (INTEL_INFO(dev)->gen >= 6) { 2672 if (INTEL_INFO(dev)->gen >= 6) {
2674 fence_reg = FENCE_REG_SANDYBRIDGE_0; 2673 fence_reg = FENCE_REG_SANDYBRIDGE_0;
@@ -2678,8 +2677,23 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
2678 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 2677 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
2679 } 2678 }
2680 2679
2680 fence_reg += reg * 8;
2681
2682 /* To w/a incoherency with non-atomic 64-bit register updates,
2683 * we split the 64-bit update into two 32-bit writes. In order
2684 * for a partial fence not to be evaluated between writes, we
2685 * precede the update with write to turn off the fence register,
2686 * and only enable the fence as the last step.
2687 *
2688 * For extra levels of paranoia, we make sure each step lands
2689 * before applying the next step.
2690 */
2691 I915_WRITE(fence_reg, 0);
2692 POSTING_READ(fence_reg);
2693
2681 if (obj) { 2694 if (obj) {
2682 u32 size = obj->gtt_space->size; 2695 u32 size = obj->gtt_space->size;
2696 uint64_t val;
2683 2697
2684 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2698 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2685 0xfffff000) << 32; 2699 0xfffff000) << 32;
@@ -2688,12 +2702,16 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
2688 if (obj->tiling_mode == I915_TILING_Y) 2702 if (obj->tiling_mode == I915_TILING_Y)
2689 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2703 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2690 val |= I965_FENCE_REG_VALID; 2704 val |= I965_FENCE_REG_VALID;
2691 } else
2692 val = 0;
2693 2705
2694 fence_reg += reg * 8; 2706 I915_WRITE(fence_reg + 4, val >> 32);
2695 I915_WRITE64(fence_reg, val); 2707 POSTING_READ(fence_reg + 4);
2696 POSTING_READ(fence_reg); 2708
2709 I915_WRITE(fence_reg + 0, val);
2710 POSTING_READ(fence_reg);
2711 } else {
2712 I915_WRITE(fence_reg + 4, 0);
2713 POSTING_READ(fence_reg + 4);
2714 }
2697} 2715}
2698 2716
2699static void i915_write_fence_reg(struct drm_device *dev, int reg, 2717static void i915_write_fence_reg(struct drm_device *dev, int reg,