1 files changed, 94 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 91bb1fc27420..f56af0aaafde 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
        return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+        unsigned long t;
+        /* Cheaply and approximately convert from nanoseconds to microseconds.
+         * The result and subsequent calculations are also defined in the same
+         * approximate microseconds units. The principal source of timing
+         * error here is from the simple truncation.
+         *
+         * Note that local_clock() is only defined wrt to the current CPU;
+         * the comparisons are no longer valid if we switch CPUs. Instead of
+         * blocking preemption for the entire busywait, we can detect the CPU
+         * switch and use that as indicator of system load and a reason to
+         * stop busywaiting, see busywait_stop().
+         */
+        *cpu = get_cpu();
+        t = local_clock() >> 10;
+        put_cpu();
+        return t;
+}
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+        unsigned this_cpu;
+        if (time_after(local_clock_us(&this_cpu), timeout))
+                return true;
+        return this_cpu != cpu;
+}
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 {
        unsigned long timeout;
+        unsigned cpu;
+        /* When waiting for high frequency requests, e.g. during synchronous
+         * rendering split between the CPU and GPU, the finite amount of time
+         * required to set up the irq and wait upon it limits the response
+         * rate. By busywaiting on the request completion for a short while we
+         * can service the high frequency waits as quick as possible. However,
+         * if it is a slow request, we want to sleep as quickly as possible.
+         * The tradeoff between waiting and sleeping is roughly the time it
+         * takes to sleep on a request, on the order of a microsecond.
+         */
-        if (i915_gem_request_get_ring(req)->irq_refcount)
+        if (req->ring->irq_refcount)
                return -EBUSY;
-        timeout = jiffies + 1;
+        /* Only spin if we know the GPU is processing this request */
+        if (!i915_gem_request_started(req, true))
+                return -EAGAIN;
+        timeout = local_clock_us(&cpu) + 5;
        while (!need_resched()) {
                if (i915_gem_request_completed(req, true))
                        return 0;
-                if (time_after_eq(jiffies, timeout))
+                if (signal_pending_state(state, current))
+                        break;
+                if (busywait_stop(timeout, cpu))
                        break;
                cpu_relax_lowlatency();
        }
        if (i915_gem_request_completed(req, false))
                return 0;
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        struct drm_i915_private *dev_priv = dev->dev_private;
        const bool irq_test_in_progress =
                ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+        int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
        DEFINE_WAIT(wait);
        unsigned long timeout_expire;
        s64 before, now;
@@ -1210,8 +1262,16 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        if (i915_gem_request_completed(req, true))
                return 0;
-        timeout_expire = timeout ?
+        timeout_expire = 0;
-                jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
+        if (timeout) {
+                if (WARN_ON(*timeout < 0))
+                        return -EINVAL;
+                if (*timeout == 0)
+                        return -ETIME;
+                timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+        }
        if (INTEL_INFO(dev_priv)->gen >= 6)
                gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
@@ -1221,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        before = ktime_get_raw_ns();
        /* Optimistic spin for the next jiffie before touching IRQs */
-        ret = __i915_spin_request(req);
+        ret = __i915_spin_request(req, state);
        if (ret == 0)
                goto out;
@@ -1233,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
        for (;;) {
                struct timer_list timer;
-                prepare_to_wait(&ring->irq_queue, &wait,
+                prepare_to_wait(&ring->irq_queue, &wait, state);
-                                interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
                /* We need to check whether any gpu reset happened in between
                 * the caller grabbing the seqno and now ... */
@@ -1252,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
                        break;
                }
-                if (interruptible && signal_pending(current)) {
+                if (signal_pending_state(state, current)) {
                        ret = -ERESTARTSYS;
                        break;
                }
@@ -2546,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
        request->batch_obj = obj;
        request->emitted_jiffies = jiffies;
+        request->previous_seqno = ring->last_submitted_seqno;
        ring->last_submitted_seqno = request->seqno;
        list_add_tail(&request->list, &ring->request_list);
@@ -4072,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
        return false;
 }
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+        struct drm_i915_gem_object *obj = vma->obj;
+        bool mappable, fenceable;
+        u32 fence_size, fence_alignment;
+        fence_size = i915_gem_get_gtt_size(obj->base.dev,
+                                           obj->base.size,
+                                           obj->tiling_mode);
+        fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+                                                     obj->base.size,
+                                                     obj->tiling_mode,
+                                                     true);
+        fenceable = (vma->node.size == fence_size &&
+                     (vma->node.start & (fence_alignment - 1)) == 0);
+        mappable = (vma->node.start + fence_size <=
+                    to_i915(obj->base.dev)->gtt.mappable_end);
+        obj->map_and_fenceable = mappable && fenceable;
+}
 static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
                       struct i915_address_space *vm,
@@ -4139,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
        if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
            (bound ^ vma->bound) & GLOBAL_BIND) {
-                bool mappable, fenceable;
+                __i915_vma_set_map_and_fenceable(vma);
-                u32 fence_size, fence_alignment;
-                fence_size = i915_gem_get_gtt_size(obj->base.dev,
-                                                   obj->base.size,
-                                                   obj->tiling_mode);
-                fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
-                                                             obj->base.size,
-                                                             obj->tiling_mode,
-                                                             true);
-                fenceable = (vma->node.size == fence_size &&
-                             (vma->node.start & (fence_alignment - 1)) == 0);
-                mappable = (vma->node.start + fence_size <=
-                            dev_priv->gtt.mappable_end);
-                obj->map_and_fenceable = mappable && fenceable;
                WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
        }