aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-09-08 09:25:41 -0400
committerJani Nikula <jani.nikula@intel.com>2014-09-08 09:45:03 -0400
commitc4d69da167fa967749aeb70bc0e94a457e5d00c1 (patch)
tree35362ec52e7b0a60940906db30e6a8b3c251e3b2 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent4868b45de11483d5b307a406d7f1a707699b1702 (diff)
drm/i915: Evict CS TLBs between batches
Running igt, I was encountering the invalid TLB bug on my 845g, despite that it was using the CS workaround. Examining the w/a buffer in the error state, showed that the copy from the user batch into the workaround itself was suffering from the invalid TLB bug (the first cacheline was broken with the first two words reversed). Time to try a fresh approach. This extends the workaround to write into each page of our scratch buffer in order to overflow the TLB and evict the invalid entries. This could be refined to only do so after we update the GTT, but for simplicity, we do it before each batch. I suspect this supersedes our current workaround, but for safety keep doing both. v2: The magic number shall be 2. This doesn't conclusively prove that it is the mythical TLB bug we've been trying to workaround for so long, that it requires touching a number of pages to prevent the corruption indicates to me that it is TLB related, but the corruption (the reversed cacheline) is more subtle than a TLB bug, where we would expect it to read the wrong page entirely. Oh well, it prevents a reliable hang for me and so probably for others as well. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c66
1 files changed, 39 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 16371a444426..2d068edd1adc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1363,54 +1363,66 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
1363 1363
1364/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 1364/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1365#define I830_BATCH_LIMIT (256*1024) 1365#define I830_BATCH_LIMIT (256*1024)
1366#define I830_TLB_ENTRIES (2)
1367#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1366static int 1368static int
1367i830_dispatch_execbuffer(struct intel_engine_cs *ring, 1369i830_dispatch_execbuffer(struct intel_engine_cs *ring,
1368 u64 offset, u32 len, 1370 u64 offset, u32 len,
1369 unsigned flags) 1371 unsigned flags)
1370{ 1372{
1373 u32 cs_offset = ring->scratch.gtt_offset;
1371 int ret; 1374 int ret;
1372 1375
1373 if (flags & I915_DISPATCH_PINNED) { 1376 ret = intel_ring_begin(ring, 6);
1374 ret = intel_ring_begin(ring, 4); 1377 if (ret)
1375 if (ret) 1378 return ret;
1376 return ret;
1377 1379
1378 intel_ring_emit(ring, MI_BATCH_BUFFER); 1380 /* Evict the invalid PTE TLBs */
1379 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1381 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1380 intel_ring_emit(ring, offset + len - 8); 1382 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1381 intel_ring_emit(ring, MI_NOOP); 1383 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1382 intel_ring_advance(ring); 1384 intel_ring_emit(ring, cs_offset);
1383 } else { 1385 intel_ring_emit(ring, 0xdeadbeef);
1384 u32 cs_offset = ring->scratch.gtt_offset; 1386 intel_ring_emit(ring, MI_NOOP);
1387 intel_ring_advance(ring);
1385 1388
1389 if ((flags & I915_DISPATCH_PINNED) == 0) {
1386 if (len > I830_BATCH_LIMIT) 1390 if (len > I830_BATCH_LIMIT)
1387 return -ENOSPC; 1391 return -ENOSPC;
1388 1392
1389 ret = intel_ring_begin(ring, 9+3); 1393 ret = intel_ring_begin(ring, 6 + 2);
1390 if (ret) 1394 if (ret)
1391 return ret; 1395 return ret;
1392 /* Blit the batch (which has now all relocs applied) to the stable batch 1396
1393 * scratch bo area (so that the CS never stumbles over its tlb 1397 /* Blit the batch (which has now all relocs applied) to the
1394 * invalidation bug) ... */ 1398 * stable batch scratch bo area (so that the CS never
1395 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | 1399 * stumbles over its tlb invalidation bug) ...
1396 XY_SRC_COPY_BLT_WRITE_ALPHA | 1400 */
1397 XY_SRC_COPY_BLT_WRITE_RGB); 1401 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1398 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); 1402 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1399 intel_ring_emit(ring, 0); 1403 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 1024);
1400 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1401 intel_ring_emit(ring, cs_offset); 1404 intel_ring_emit(ring, cs_offset);
1402 intel_ring_emit(ring, 0);
1403 intel_ring_emit(ring, 4096); 1405 intel_ring_emit(ring, 4096);
1404 intel_ring_emit(ring, offset); 1406 intel_ring_emit(ring, offset);
1407
1405 intel_ring_emit(ring, MI_FLUSH); 1408 intel_ring_emit(ring, MI_FLUSH);
1409 intel_ring_emit(ring, MI_NOOP);
1410 intel_ring_advance(ring);
1406 1411
1407 /* ... and execute it. */ 1412 /* ... and execute it. */
1408 intel_ring_emit(ring, MI_BATCH_BUFFER); 1413 offset = cs_offset;
1409 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1410 intel_ring_emit(ring, cs_offset + len - 8);
1411 intel_ring_advance(ring);
1412 } 1414 }
1413 1415
1416 ret = intel_ring_begin(ring, 4);
1417 if (ret)
1418 return ret;
1419
1420 intel_ring_emit(ring, MI_BATCH_BUFFER);
1421 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1422 intel_ring_emit(ring, offset + len - 8);
1423 intel_ring_emit(ring, MI_NOOP);
1424 intel_ring_advance(ring);
1425
1414 return 0; 1426 return 0;
1415} 1427}
1416 1428
@@ -2200,7 +2212,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
2200 2212
2201 /* Workaround batchbuffer to combat CS tlb bug. */ 2213 /* Workaround batchbuffer to combat CS tlb bug. */
2202 if (HAS_BROKEN_CS_TLB(dev)) { 2214 if (HAS_BROKEN_CS_TLB(dev)) {
2203 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); 2215 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
2204 if (obj == NULL) { 2216 if (obj == NULL) {
2205 DRM_ERROR("Failed to allocate batch bo\n"); 2217 DRM_ERROR("Failed to allocate batch bo\n");
2206 return -ENOMEM; 2218 return -ENOMEM;