aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2012-12-17 10:21:27 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-12-17 11:27:02 -0500
commitb45305fce5bb1abec263fcff9d81ebecd6306ede (patch)
tree5f8c074f2ceeb153bed7a0f28d631aec31b5eefc /drivers/gpu/drm
parent6547fbdbfff62c99e4f7b4f985ff8b3454f33b0f (diff)
drm/i915: Implement workaround for broken CS tlb on i830/845
Now that Chris Wilson demonstrated that the key for stability on early gen 2 is to simple _never_ exchange the physical backing storage of batch buffers I've tried a stab at a kernel solution. Doesn't look too nefarious imho, now that I don't try to be too clever for my own good any more. v2: After discussing the various techniques, we've decided to always blit batches on the suspect devices, but allow userspace to opt out of the kernel workaround assume full responsibility for providing coherent batches. The principal reason is that avoiding the blit does improve performance in a few key microbenchmarks and also in cairo-trace replays. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: - Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring wrap w/a. Suggested by Chris Wilson. - Also add the ACTHD check from Chris Wilson for the error state dumping, so that we still catch batches when userspace opts out of the w/a.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c12
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c76
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h1
6 files changed, 90 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8f63cd5de4b4..99daa896105d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
989 case I915_PARAM_HAS_SECURE_BATCHES: 989 case I915_PARAM_HAS_SECURE_BATCHES:
990 value = capable(CAP_SYS_ADMIN); 990 value = capable(CAP_SYS_ADMIN);
991 break; 991 break;
992 case I915_PARAM_HAS_PINNED_BATCHES:
993 value = 1;
994 break;
992 default: 995 default:
993 DRM_DEBUG_DRIVER("Unknown parameter %d\n", 996 DRM_DEBUG_DRIVER("Unknown parameter %d\n",
994 param->param); 997 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 062a60b381b7..1a4c3a1c111f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1100,6 +1100,7 @@ struct drm_i915_gem_object {
1100 */ 1100 */
1101 atomic_t pending_flip; 1101 atomic_t pending_flip;
1102}; 1102};
1103#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
1103 1104
1104#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) 1105#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
1105 1106
@@ -1199,6 +1200,9 @@ struct drm_i915_file_private {
1199#define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) 1200#define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay)
1200#define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) 1201#define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical)
1201 1202
1203/* Early gen2 have a totally busted CS tlb and require pinned batches. */
1204#define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev))
1205
1202/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte 1206/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
1203 * rows, which changed the alignment requirements and fence programming. 1207 * rows, which changed the alignment requirements and fence programming.
1204 */ 1208 */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ee8f97f0539e..d6a994a07393 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -808,6 +808,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
808 808
809 flags |= I915_DISPATCH_SECURE; 809 flags |= I915_DISPATCH_SECURE;
810 } 810 }
811 if (args->flags & I915_EXEC_IS_PINNED)
812 flags |= I915_DISPATCH_PINNED;
811 813
812 switch (args->flags & I915_EXEC_RING_MASK) { 814 switch (args->flags & I915_EXEC_RING_MASK) {
813 case I915_EXEC_DEFAULT: 815 case I915_EXEC_DEFAULT:
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a4dc97f8b9f0..2220dec3e5d9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1087,6 +1087,18 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
1087 if (!ring->get_seqno) 1087 if (!ring->get_seqno)
1088 return NULL; 1088 return NULL;
1089 1089
1090 if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
1091 u32 acthd = I915_READ(ACTHD);
1092
1093 if (WARN_ON(ring->id != RCS))
1094 return NULL;
1095
1096 obj = ring->private;
1097 if (acthd >= obj->gtt_offset &&
1098 acthd < obj->gtt_offset + obj->base.size)
1099 return i915_error_object_create(dev_priv, obj);
1100 }
1101
1090 seqno = ring->get_seqno(ring, false); 1102 seqno = ring->get_seqno(ring, false);
1091 list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { 1103 list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
1092 if (obj->ring != ring) 1104 if (obj->ring != ring)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2346b920bd86..ae253e04c391 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring)
547 547
548static void render_ring_cleanup(struct intel_ring_buffer *ring) 548static void render_ring_cleanup(struct intel_ring_buffer *ring)
549{ 549{
550 struct drm_device *dev = ring->dev;
551
550 if (!ring->private) 552 if (!ring->private)
551 return; 553 return;
552 554
555 if (HAS_BROKEN_CS_TLB(dev))
556 drm_gem_object_unreference(to_gem_object(ring->private));
557
553 cleanup_pipe_control(ring); 558 cleanup_pipe_control(ring);
554} 559}
555 560
@@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
969 return 0; 974 return 0;
970} 975}
971 976
977/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
978#define I830_BATCH_LIMIT (256*1024)
972static int 979static int
973i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 980i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
974 u32 offset, u32 len, 981 u32 offset, u32 len,
@@ -976,15 +983,47 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
976{ 983{
977 int ret; 984 int ret;
978 985
979 ret = intel_ring_begin(ring, 4); 986 if (flags & I915_DISPATCH_PINNED) {
980 if (ret) 987 ret = intel_ring_begin(ring, 4);
981 return ret; 988 if (ret)
989 return ret;
982 990
983 intel_ring_emit(ring, MI_BATCH_BUFFER); 991 intel_ring_emit(ring, MI_BATCH_BUFFER);
984 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 992 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
985 intel_ring_emit(ring, offset + len - 8); 993 intel_ring_emit(ring, offset + len - 8);
986 intel_ring_emit(ring, 0); 994 intel_ring_emit(ring, MI_NOOP);
987 intel_ring_advance(ring); 995 intel_ring_advance(ring);
996 } else {
997 struct drm_i915_gem_object *obj = ring->private;
998 u32 cs_offset = obj->gtt_offset;
999
1000 if (len > I830_BATCH_LIMIT)
1001 return -ENOSPC;
1002
1003 ret = intel_ring_begin(ring, 9+3);
1004 if (ret)
1005 return ret;
1006 /* Blit the batch (which has now all relocs applied) to the stable batch
1007 * scratch bo area (so that the CS never stumbles over its tlb
1008 * invalidation bug) ... */
1009 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1010 XY_SRC_COPY_BLT_WRITE_ALPHA |
1011 XY_SRC_COPY_BLT_WRITE_RGB);
1012 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1013 intel_ring_emit(ring, 0);
1014 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1015 intel_ring_emit(ring, cs_offset);
1016 intel_ring_emit(ring, 0);
1017 intel_ring_emit(ring, 4096);
1018 intel_ring_emit(ring, offset);
1019 intel_ring_emit(ring, MI_FLUSH);
1020
1021 /* ... and execute it. */
1022 intel_ring_emit(ring, MI_BATCH_BUFFER);
1023 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1024 intel_ring_emit(ring, cs_offset + len - 8);
1025 intel_ring_advance(ring);
1026 }
988 1027
989 return 0; 1028 return 0;
990} 1029}
@@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1596 ring->init = init_render_ring; 1635 ring->init = init_render_ring;
1597 ring->cleanup = render_ring_cleanup; 1636 ring->cleanup = render_ring_cleanup;
1598 1637
1638 /* Workaround batchbuffer to combat CS tlb bug. */
1639 if (HAS_BROKEN_CS_TLB(dev)) {
1640 struct drm_i915_gem_object *obj;
1641 int ret;
1642
1643 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1644 if (obj == NULL) {
1645 DRM_ERROR("Failed to allocate batch bo\n");
1646 return -ENOMEM;
1647 }
1648
1649 ret = i915_gem_object_pin(obj, 0, true, false);
1650 if (ret != 0) {
1651 drm_gem_object_unreference(&obj->base);
1652 DRM_ERROR("Failed to ping batch bo\n");
1653 return ret;
1654 }
1655
1656 ring->private = obj;
1657 }
1658
1599 return intel_init_ring_buffer(dev, ring); 1659 return intel_init_ring_buffer(dev, ring);
1600} 1660}
1601 1661
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 526182ed0c6d..6af87cd05725 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -94,6 +94,7 @@ struct intel_ring_buffer {
94 u32 offset, u32 length, 94 u32 offset, u32 length,
95 unsigned flags); 95 unsigned flags);
96#define I915_DISPATCH_SECURE 0x1 96#define I915_DISPATCH_SECURE 0x1
97#define I915_DISPATCH_PINNED 0x2
97 void (*cleanup)(struct intel_ring_buffer *ring); 98 void (*cleanup)(struct intel_ring_buffer *ring);
98 int (*sync_to)(struct intel_ring_buffer *ring, 99 int (*sync_to)(struct intel_ring_buffer *ring,
99 struct intel_ring_buffer *to, 100 struct intel_ring_buffer *to,