1 files changed, 11 insertions, 5 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6a978ce80244..5c6080fd0968 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -841,11 +841,11 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
                if (unlikely(total_bytes > remain_usable)) {
                        /*
                         * The base request will fit but the reserved space
-                         * falls off the end. So only need to to wait for the
+                         * falls off the end. So don't need an immediate wrap
-                         * reserved size after flushing out the remainder.
+                         * and only need to effectively wait for the reserved
+                         * size space from the start of ringbuffer.
                         */
                        wait_bytes = remain_actual + ringbuf->reserved_size;
-                        need_wrap = true;
                } else if (total_bytes > ringbuf->space) {
                        /* No wrapping required, just waiting. */
                        wait_bytes = total_bytes;
@@ -1913,15 +1913,18 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
        struct intel_ringbuffer *ringbuf = request->ringbuf;
        int ret;
-        ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
+        ret = intel_logical_ring_begin(request, 8 + WA_TAIL_DWORDS);
        if (ret)
                return ret;
+        /* We're using qword write, seqno should be aligned to 8 bytes. */
+        BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
        /* w/a for post sync ops following a GPGPU operation we
         * need a prior CS_STALL, which is emitted by the flush
         * following the batch.
         */
-        intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(5));
+        intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
        intel_logical_ring_emit(ringbuf,
                                (PIPE_CONTROL_GLOBAL_GTT_IVB |
                                 PIPE_CONTROL_CS_STALL |
@@ -1929,7 +1932,10 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
        intel_logical_ring_emit(ringbuf, hws_seqno_address(request->ring));
        intel_logical_ring_emit(ringbuf, 0);
        intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
+        /* We're thrashing one dword of HWS. */
+        intel_logical_ring_emit(ringbuf, 0);
        intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
+        intel_logical_ring_emit(ringbuf, MI_NOOP);
        return intel_logical_ring_advance_and_submit(request);
 }