aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c123
1 files changed, 94 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 91bb1fc27420..f56af0aaafde 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
1146 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1146 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1147} 1147}
1148 1148
1149static int __i915_spin_request(struct drm_i915_gem_request *req) 1149static unsigned long local_clock_us(unsigned *cpu)
1150{
1151 unsigned long t;
1152
1153 /* Cheaply and approximately convert from nanoseconds to microseconds.
1154 * The result and subsequent calculations are also defined in the same
1155 * approximate microseconds units. The principal source of timing
1156 * error here is from the simple truncation.
1157 *
1158 * Note that local_clock() is only defined wrt to the current CPU;
1159 * the comparisons are no longer valid if we switch CPUs. Instead of
1160 * blocking preemption for the entire busywait, we can detect the CPU
1161 * switch and use that as indicator of system load and a reason to
1162 * stop busywaiting, see busywait_stop().
1163 */
1164 *cpu = get_cpu();
1165 t = local_clock() >> 10;
1166 put_cpu();
1167
1168 return t;
1169}
1170
1171static bool busywait_stop(unsigned long timeout, unsigned cpu)
1172{
1173 unsigned this_cpu;
1174
1175 if (time_after(local_clock_us(&this_cpu), timeout))
1176 return true;
1177
1178 return this_cpu != cpu;
1179}
1180
1181static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1150{ 1182{
1151 unsigned long timeout; 1183 unsigned long timeout;
1184 unsigned cpu;
1185
1186 /* When waiting for high frequency requests, e.g. during synchronous
1187 * rendering split between the CPU and GPU, the finite amount of time
1188 * required to set up the irq and wait upon it limits the response
1189 * rate. By busywaiting on the request completion for a short while we
1190 * can service the high frequency waits as quick as possible. However,
1191 * if it is a slow request, we want to sleep as quickly as possible.
1192 * The tradeoff between waiting and sleeping is roughly the time it
1193 * takes to sleep on a request, on the order of a microsecond.
1194 */
1152 1195
1153 if (i915_gem_request_get_ring(req)->irq_refcount) 1196 if (req->ring->irq_refcount)
1154 return -EBUSY; 1197 return -EBUSY;
1155 1198
1156 timeout = jiffies + 1; 1199 /* Only spin if we know the GPU is processing this request */
1200 if (!i915_gem_request_started(req, true))
1201 return -EAGAIN;
1202
1203 timeout = local_clock_us(&cpu) + 5;
1157 while (!need_resched()) { 1204 while (!need_resched()) {
1158 if (i915_gem_request_completed(req, true)) 1205 if (i915_gem_request_completed(req, true))
1159 return 0; 1206 return 0;
1160 1207
1161 if (time_after_eq(jiffies, timeout)) 1208 if (signal_pending_state(state, current))
1209 break;
1210
1211 if (busywait_stop(timeout, cpu))
1162 break; 1212 break;
1163 1213
1164 cpu_relax_lowlatency(); 1214 cpu_relax_lowlatency();
1165 } 1215 }
1216
1166 if (i915_gem_request_completed(req, false)) 1217 if (i915_gem_request_completed(req, false))
1167 return 0; 1218 return 0;
1168 1219
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
1197 struct drm_i915_private *dev_priv = dev->dev_private; 1248 struct drm_i915_private *dev_priv = dev->dev_private;
1198 const bool irq_test_in_progress = 1249 const bool irq_test_in_progress =
1199 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1250 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
1251 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1200 DEFINE_WAIT(wait); 1252 DEFINE_WAIT(wait);
1201 unsigned long timeout_expire; 1253 unsigned long timeout_expire;
1202 s64 before, now; 1254 s64 before, now;
@@ -1210,8 +1262,16 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
1210 if (i915_gem_request_completed(req, true)) 1262 if (i915_gem_request_completed(req, true))
1211 return 0; 1263 return 0;
1212 1264
1213 timeout_expire = timeout ? 1265 timeout_expire = 0;
1214 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1266 if (timeout) {
1267 if (WARN_ON(*timeout < 0))
1268 return -EINVAL;
1269
1270 if (*timeout == 0)
1271 return -ETIME;
1272
1273 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
1274 }
1215 1275
1216 if (INTEL_INFO(dev_priv)->gen >= 6) 1276 if (INTEL_INFO(dev_priv)->gen >= 6)
1217 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1277 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
@@ -1221,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
1221 before = ktime_get_raw_ns(); 1281 before = ktime_get_raw_ns();
1222 1282
1223 /* Optimistic spin for the next jiffie before touching IRQs */ 1283 /* Optimistic spin for the next jiffie before touching IRQs */
1224 ret = __i915_spin_request(req); 1284 ret = __i915_spin_request(req, state);
1225 if (ret == 0) 1285 if (ret == 0)
1226 goto out; 1286 goto out;
1227 1287
@@ -1233,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
1233 for (;;) { 1293 for (;;) {
1234 struct timer_list timer; 1294 struct timer_list timer;
1235 1295
1236 prepare_to_wait(&ring->irq_queue, &wait, 1296 prepare_to_wait(&ring->irq_queue, &wait, state);
1237 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
1238 1297
1239 /* We need to check whether any gpu reset happened in between 1298 /* We need to check whether any gpu reset happened in between
1240 * the caller grabbing the seqno and now ... */ 1299 * the caller grabbing the seqno and now ... */
@@ -1252,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
1252 break; 1311 break;
1253 } 1312 }
1254 1313
1255 if (interruptible && signal_pending(current)) { 1314 if (signal_pending_state(state, current)) {
1256 ret = -ERESTARTSYS; 1315 ret = -ERESTARTSYS;
1257 break; 1316 break;
1258 } 1317 }
@@ -2546,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
2546 request->batch_obj = obj; 2605 request->batch_obj = obj;
2547 2606
2548 request->emitted_jiffies = jiffies; 2607 request->emitted_jiffies = jiffies;
2608 request->previous_seqno = ring->last_submitted_seqno;
2549 ring->last_submitted_seqno = request->seqno; 2609 ring->last_submitted_seqno = request->seqno;
2550 list_add_tail(&request->list, &ring->request_list); 2610 list_add_tail(&request->list, &ring->request_list);
2551 2611
@@ -4072,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4072 return false; 4132 return false;
4073} 4133}
4074 4134
4135void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
4136{
4137 struct drm_i915_gem_object *obj = vma->obj;
4138 bool mappable, fenceable;
4139 u32 fence_size, fence_alignment;
4140
4141 fence_size = i915_gem_get_gtt_size(obj->base.dev,
4142 obj->base.size,
4143 obj->tiling_mode);
4144 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4145 obj->base.size,
4146 obj->tiling_mode,
4147 true);
4148
4149 fenceable = (vma->node.size == fence_size &&
4150 (vma->node.start & (fence_alignment - 1)) == 0);
4151
4152 mappable = (vma->node.start + fence_size <=
4153 to_i915(obj->base.dev)->gtt.mappable_end);
4154
4155 obj->map_and_fenceable = mappable && fenceable;
4156}
4157
4075static int 4158static int
4076i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4159i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
4077 struct i915_address_space *vm, 4160 struct i915_address_space *vm,
@@ -4139,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
4139 4222
4140 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4223 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
4141 (bound ^ vma->bound) & GLOBAL_BIND) { 4224 (bound ^ vma->bound) & GLOBAL_BIND) {
4142 bool mappable, fenceable; 4225 __i915_vma_set_map_and_fenceable(vma);
4143 u32 fence_size, fence_alignment;
4144
4145 fence_size = i915_gem_get_gtt_size(obj->base.dev,
4146 obj->base.size,
4147 obj->tiling_mode);
4148 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4149 obj->base.size,
4150 obj->tiling_mode,
4151 true);
4152
4153 fenceable = (vma->node.size == fence_size &&
4154 (vma->node.start & (fence_alignment - 1)) == 0);
4155
4156 mappable = (vma->node.start + fence_size <=
4157 dev_priv->gtt.mappable_end);
4158
4159 obj->map_and_fenceable = mappable && fenceable;
4160
4161 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4226 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4162 } 4227 }
4163 4228