aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-12-04 09:15:16 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-12-04 10:57:08 -0500
commit5179749925933575a67f9d8f16d0cc204f98a29f (patch)
tree878d729abd18c05dc8f0266b4920bc378acdcaf8 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent4d8d9fc7050106cbac8141bd5ed5db3e4abbd5fa (diff)
drm/i915: Allocate a common scratch page
Currently we allocate a scratch page for each engine, but since we only ever write into it for post-sync operations, it is not exposed to userspace nor do we care for coherency. As we then do not care about its contents, we can use one page for all, reducing our allocations and avoid complications by not assuming per-engine isolation. For later use, it simplifies engine initialisation (by removing the allocation that required struct_mutex!) and means that we can always rely on there being a scratch page. v2: Check that we allocated a large enough scratch for I830 w/a Fixes: 06e562e7f515 ("drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5") # v4.18.20 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108850 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181204141522.13640-1-chris@chris-wilson.co.uk Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: <stable@vger.kernel.org> # v4.18.20+
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c37
1 files changed, 12 insertions, 25 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f88df5bff09..c5eb26a7ee79 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
150 */ 150 */
151 if (mode & EMIT_INVALIDATE) { 151 if (mode & EMIT_INVALIDATE) {
152 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 152 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
153 *cs++ = i915_ggtt_offset(rq->engine->scratch) | 153 *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
154 PIPE_CONTROL_GLOBAL_GTT;
155 *cs++ = 0; 154 *cs++ = 0;
156 *cs++ = 0; 155 *cs++ = 0;
157 156
@@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
159 *cs++ = MI_FLUSH; 158 *cs++ = MI_FLUSH;
160 159
161 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 160 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
162 *cs++ = i915_ggtt_offset(rq->engine->scratch) | 161 *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
163 PIPE_CONTROL_GLOBAL_GTT;
164 *cs++ = 0; 162 *cs++ = 0;
165 *cs++ = 0; 163 *cs++ = 0;
166 } 164 }
@@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
212static int 210static int
213intel_emit_post_sync_nonzero_flush(struct i915_request *rq) 211intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
214{ 212{
215 u32 scratch_addr = 213 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
216 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
217 u32 *cs; 214 u32 *cs;
218 215
219 cs = intel_ring_begin(rq, 6); 216 cs = intel_ring_begin(rq, 6);
@@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
246static int 243static int
247gen6_render_ring_flush(struct i915_request *rq, u32 mode) 244gen6_render_ring_flush(struct i915_request *rq, u32 mode)
248{ 245{
249 u32 scratch_addr = 246 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
250 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
251 u32 *cs, flags = 0; 247 u32 *cs, flags = 0;
252 int ret; 248 int ret;
253 249
@@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
316static int 312static int
317gen7_render_ring_flush(struct i915_request *rq, u32 mode) 313gen7_render_ring_flush(struct i915_request *rq, u32 mode)
318{ 314{
319 u32 scratch_addr = 315 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
320 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
321 u32 *cs, flags = 0; 316 u32 *cs, flags = 0;
322 317
323 /* 318 /*
@@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq,
994} 989}
995 990
996/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 991/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
997#define I830_BATCH_LIMIT (256*1024) 992#define I830_BATCH_LIMIT SZ_256K
998#define I830_TLB_ENTRIES (2) 993#define I830_TLB_ENTRIES (2)
999#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 994#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1000static int 995static int
@@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq,
1002 u64 offset, u32 len, 997 u64 offset, u32 len,
1003 unsigned int dispatch_flags) 998 unsigned int dispatch_flags)
1004{ 999{
1005 u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); 1000 u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
1001
1002 GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
1006 1003
1007 cs = intel_ring_begin(rq, 6); 1004 cs = intel_ring_begin(rq, 6);
1008 if (IS_ERR(cs)) 1005 if (IS_ERR(cs))
@@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
1459{ 1456{
1460 struct i915_timeline *timeline; 1457 struct i915_timeline *timeline;
1461 struct intel_ring *ring; 1458 struct intel_ring *ring;
1462 unsigned int size;
1463 int err; 1459 int err;
1464 1460
1465 intel_engine_setup_common(engine); 1461 intel_engine_setup_common(engine);
@@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
1484 GEM_BUG_ON(engine->buffer); 1480 GEM_BUG_ON(engine->buffer);
1485 engine->buffer = ring; 1481 engine->buffer = ring;
1486 1482
1487 size = PAGE_SIZE;
1488 if (HAS_BROKEN_CS_TLB(engine->i915))
1489 size = I830_WA_SIZE;
1490 err = intel_engine_create_scratch(engine, size);
1491 if (err)
1492 goto err_unpin;
1493
1494 err = intel_engine_init_common(engine); 1483 err = intel_engine_init_common(engine);
1495 if (err) 1484 if (err)
1496 goto err_scratch; 1485 goto err_unpin;
1497 1486
1498 return 0; 1487 return 0;
1499 1488
1500err_scratch:
1501 intel_engine_cleanup_scratch(engine);
1502err_unpin: 1489err_unpin:
1503 intel_ring_unpin(ring); 1490 intel_ring_unpin(ring);
1504err_ring: 1491err_ring:
@@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq)
1572 /* Stall until the page table load is complete */ 1559 /* Stall until the page table load is complete */
1573 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1560 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1574 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1561 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1575 *cs++ = i915_ggtt_offset(engine->scratch); 1562 *cs++ = i915_scratch_offset(rq->i915);
1576 *cs++ = MI_NOOP; 1563 *cs++ = MI_NOOP;
1577 1564
1578 intel_ring_advance(rq, cs); 1565 intel_ring_advance(rq, cs);
@@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
1681 /* Insert a delay before the next switch! */ 1668 /* Insert a delay before the next switch! */
1682 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1669 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1683 *cs++ = i915_mmio_reg_offset(last_reg); 1670 *cs++ = i915_mmio_reg_offset(last_reg);
1684 *cs++ = i915_ggtt_offset(engine->scratch); 1671 *cs++ = i915_scratch_offset(rq->i915);
1685 *cs++ = MI_NOOP; 1672 *cs++ = MI_NOOP;
1686 } 1673 }
1687 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1674 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;