drm/i915: Allocate a common scratch page

Currently we allocate a scratch page for each engine, but since we only ever write into it for post-sync operations, it is not exposed to userspace nor do we care for coherency. As we then do not care about its contents, we can use one page for all, reducing our allocations and avoid complications by not assuming per-engine isolation. For later use, it simplifies engine initialisation (by removing the allocation that required struct_mutex!) and means that we can always rely on there being a scratch page. v2: Check that we allocated a large enough scratch for I830 w/a Fixes: 06e562e7f515 ("drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5") # v4.18.20 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108850 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181204141522.13640-1-chris@chris-wilson.co.uk Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: <stable@vger.kernel.org> # v4.18.20+
author: Chris Wilson <chris@chris-wilson.co.uk> 2018-12-04 09:15:16 -0500
committer: Chris Wilson <chris@chris-wilson.co.uk> 2018-12-04 10:57:08 -0500
commit: 5179749925933575a67f9d8f16d0cc204f98a29f (patch)
tree: 878d729abd18c05dc8f0266b4920bc378acdcaf8 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent: 4d8d9fc7050106cbac8141bd5ed5db3e4abbd5fa (diff)
1 files changed, 12 insertions, 25 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f88df5bff09..c5eb26a7ee79 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
         */
        if (mode & EMIT_INVALIDATE) {
                *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-                *cs++ = i915_ggtt_offset(rq->engine->scratch) |
+                *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
-                        PIPE_CONTROL_GLOBAL_GTT;
                *cs++ = 0;
                *cs++ = 0;
@@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
                        *cs++ = MI_FLUSH;
                *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-                *cs++ = i915_ggtt_offset(rq->engine->scratch) |
+                *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
-                        PIPE_CONTROL_GLOBAL_GTT;
                *cs++ = 0;
                *cs++ = 0;
        }
@@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 static int
 intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 {
-        u32 scratch_addr =
+        u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
-                i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
        u32 *cs;
        cs = intel_ring_begin(rq, 6);
@@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 static int
 gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-        u32 scratch_addr =
+        u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
-                i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
        u32 *cs, flags = 0;
        int ret;
@@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
 static int
 gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-        u32 scratch_addr =
+        u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
-                i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
        u32 *cs, flags = 0;
        /*
@@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq,
 }
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT (256*1024)
+#define I830_BATCH_LIMIT SZ_256K
 #define I830_TLB_ENTRIES (2)
 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
 static int
@@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq,
                   u64 offset, u32 len,
                   unsigned int dispatch_flags)
 {
-        u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
+        u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+        GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
        cs = intel_ring_begin(rq, 6);
        if (IS_ERR(cs))
@@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
        struct i915_timeline *timeline;
        struct intel_ring *ring;
-        unsigned int size;
        int err;
        intel_engine_setup_common(engine);
@@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
        GEM_BUG_ON(engine->buffer);
        engine->buffer = ring;
-        size = PAGE_SIZE;
-        if (HAS_BROKEN_CS_TLB(engine->i915))
-                size = I830_WA_SIZE;
-        err = intel_engine_create_scratch(engine, size);
-        if (err)
-                goto err_unpin;
        err = intel_engine_init_common(engine);
        if (err)
-                goto err_scratch;
+                goto err_unpin;
        return 0;
-err_scratch:
-        intel_engine_cleanup_scratch(engine);
 err_unpin:
        intel_ring_unpin(ring);
 err_ring:
@@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq)
        /* Stall until the page table load is complete */
        *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
-        *cs++ = i915_ggtt_offset(engine->scratch);
+        *cs++ = i915_scratch_offset(rq->i915);
        *cs++ = MI_NOOP;
        intel_ring_advance(rq, cs);
@@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
                        /* Insert a delay before the next switch! */
                        *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
                        *cs++ = i915_mmio_reg_offset(last_reg);
-                        *cs++ = i915_ggtt_offset(engine->scratch);
+                        *cs++ = i915_scratch_offset(rq->i915);
                        *cs++ = MI_NOOP;
                }
                *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
author	Chris Wilson <chris@chris-wilson.co.uk>	2018-12-04 09:15:16 -0500
committer	Chris Wilson <chris@chris-wilson.co.uk>	2018-12-04 10:57:08 -0500
commit	5179749925933575a67f9d8f16d0cc204f98a29f (patch)
tree	878d729abd18c05dc8f0266b4920bc378acdcaf8 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent	4d8d9fc7050106cbac8141bd5ed5db3e4abbd5fa (diff)