drm/i915: Wait for pending flips on the GPU

Currently, if a batch buffer refers to an object with a pending flip, then we sleep until that pending flip is completed (unpinned and signalled). This is so that a flip can be queued and the user can continue rendering to the backbuffer oblivious to whether the buffer is still pinned as the scan out. (The kernel arbitrating at the last moment to stall the batch and wait until the buffer is unpinned and replaced as the front buffer.) As we only have a queue depth of 1, we can simply wait for the current pending flip to complete and continue rendering. We can achieve this with a single WAIT_FOR_EVENT command inserted into the ring buffer prior to executing the batch, *without* stalling the client. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2010-10-07 12:28:15 -0400
committer: Chris Wilson <chris@chris-wilson.co.uk> 2010-10-07 14:10:09 -0400
commit: e59f2bac15042eb744851bcf866f18dadc3091c6 (patch)
tree: a0534aea9beab6f65489d171e6dff2b3c992c13d
parent: c2873e9633fe908dccd36dbb1d370e9c59a1ca62 (diff)
2 files changed, 34 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 100a7537980..72ab3032300 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3647,41 +3647,6 @@ i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
 }
 static int
-i915_gem_wait_for_pending_flip(struct drm_device *dev,
-                               struct drm_gem_object **object_list,
-                               int count)
-{
-        drm_i915_private_t *dev_priv = dev->dev_private;
-        struct drm_i915_gem_object *obj_priv;
-        DEFINE_WAIT(wait);
-        int i, ret = 0;
-        for (;;) {
-                prepare_to_wait(&dev_priv->pending_flip_queue,
-                                &wait, TASK_INTERRUPTIBLE);
-                for (i = 0; i < count; i++) {
-                        obj_priv = to_intel_bo(object_list[i]);
-                        if (atomic_read(&obj_priv->pending_flip) > 0)
-                                break;
-                }
-                if (i == count)
-                        break;
-                if (!signal_pending(current)) {
-                        mutex_unlock(&dev->struct_mutex);
-                        schedule();
-                        mutex_lock(&dev->struct_mutex);
-                        continue;
-                }
-                ret = -ERESTARTSYS;
-                break;
-        }
-        finish_wait(&dev_priv->pending_flip_queue, &wait);
-        return ret;
-}
-static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                       struct drm_file *file_priv,
                       struct drm_i915_gem_execbuffer2 *args,
@@ -3773,7 +3738,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
        }
        /* Look up object handles */
-        flips = 0;
        for (i = 0; i < args->buffer_count; i++) {
                object_list[i] = drm_gem_object_lookup(dev, file_priv,
                                                       exec_list[i].handle);
@@ -3796,14 +3760,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                        goto err;
                }
                obj_priv->in_execbuffer = true;
-                flips += atomic_read(&obj_priv->pending_flip);
-        }
-        if (flips > 0) {
-                ret = i915_gem_wait_for_pending_flip(dev, object_list,
-                                                     args->buffer_count);
-                if (ret)
-                        goto err;
        }
        /* Pin and relocate */
@@ -3943,9 +3899,38 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                              ~0);
 #endif
+        /* Check for any pending flips. As we only maintain a flip queue depth
+         * of 1, we can simply insert a WAIT for the next display flip prior
+         * to executing the batch and avoid stalling the CPU.
+         */
+        flips = 0;
+        for (i = 0; i < args->buffer_count; i++) {
+                if (object_list[i]->write_domain)
+                        flips |= atomic_read(&to_intel_bo(object_list[i])->pending_flip);
+        }
+        if (flips) {
+                int plane, flip_mask;
+                for (plane = 0; flips >> plane; plane++) {
+                        if (((flips >> plane) & 1) == 0)
+                                continue;
+                        if (plane)
+                                flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+                        else
+                                flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+                        intel_ring_begin(dev, ring, 2);
+                        intel_ring_emit(dev, ring,
+                                        MI_WAIT_FOR_EVENT | flip_mask);
+                        intel_ring_emit(dev, ring, MI_NOOP);
+                        intel_ring_advance(dev, ring);
+                }
+        }
        /* Exec the batchbuffer */
        ret = ring->dispatch_gem_execbuffer(dev, ring, args,
-                        cliprects, exec_offset);
+                                            cliprects, exec_offset);
        if (ret) {
                DRM_ERROR("dispatch failed %d\n", ret);
                goto err;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9109c00f3ea..7fe92d06eb2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4994,8 +4994,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
        obj_priv = to_intel_bo(work->pending_flip_obj);
        /* Initial scanout buffer will have a 0 pending flip count */
-        if ((atomic_read(&obj_priv->pending_flip) == 0) ||
+        atomic_clear_mask(1 << intel_crtc->plane,
-            atomic_dec_and_test(&obj_priv->pending_flip))
+                          &obj_priv->pending_flip.counter);
+        if (atomic_read(&obj_priv->pending_flip) == 0)
                wake_up(&dev_priv->pending_flip_queue);
        schedule_work(&work->work);
@@ -5092,7 +5093,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                goto cleanup_objs;
        obj_priv = to_intel_bo(obj);
-        atomic_inc(&obj_priv->pending_flip);
+        atomic_add(1 << intel_crtc->plane, &obj_priv->pending_flip);
        work->pending_flip_obj = obj;
        if (IS_GEN3(dev) || IS_GEN2(dev)) {
author	Chris Wilson <chris@chris-wilson.co.uk>	2010-10-07 12:28:15 -0400
committer	Chris Wilson <chris@chris-wilson.co.uk>	2010-10-07 14:10:09 -0400
commit	e59f2bac15042eb744851bcf866f18dadc3091c6 (patch)
tree	a0534aea9beab6f65489d171e6dff2b3c992c13d
parent	c2873e9633fe908dccd36dbb1d370e9c59a1ca62 (diff)