diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 123 |
1 files changed, 94 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 91bb1fc27420..f56af0aaafde 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv, | |||
1146 | return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); | 1146 | return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); |
1147 | } | 1147 | } |
1148 | 1148 | ||
1149 | static int __i915_spin_request(struct drm_i915_gem_request *req) | 1149 | static unsigned long local_clock_us(unsigned *cpu) |
1150 | { | ||
1151 | unsigned long t; | ||
1152 | |||
1153 | /* Cheaply and approximately convert from nanoseconds to microseconds. | ||
1154 | * The result and subsequent calculations are also defined in the same | ||
1155 | * approximate microseconds units. The principal source of timing | ||
1156 | * error here is from the simple truncation. | ||
1157 | * | ||
1158 | * Note that local_clock() is only defined wrt to the current CPU; | ||
1159 | * the comparisons are no longer valid if we switch CPUs. Instead of | ||
1160 | * blocking preemption for the entire busywait, we can detect the CPU | ||
1161 | * switch and use that as indicator of system load and a reason to | ||
1162 | * stop busywaiting, see busywait_stop(). | ||
1163 | */ | ||
1164 | *cpu = get_cpu(); | ||
1165 | t = local_clock() >> 10; | ||
1166 | put_cpu(); | ||
1167 | |||
1168 | return t; | ||
1169 | } | ||
1170 | |||
1171 | static bool busywait_stop(unsigned long timeout, unsigned cpu) | ||
1172 | { | ||
1173 | unsigned this_cpu; | ||
1174 | |||
1175 | if (time_after(local_clock_us(&this_cpu), timeout)) | ||
1176 | return true; | ||
1177 | |||
1178 | return this_cpu != cpu; | ||
1179 | } | ||
1180 | |||
1181 | static int __i915_spin_request(struct drm_i915_gem_request *req, int state) | ||
1150 | { | 1182 | { |
1151 | unsigned long timeout; | 1183 | unsigned long timeout; |
1184 | unsigned cpu; | ||
1185 | |||
1186 | /* When waiting for high frequency requests, e.g. during synchronous | ||
1187 | * rendering split between the CPU and GPU, the finite amount of time | ||
1188 | * required to set up the irq and wait upon it limits the response | ||
1189 | * rate. By busywaiting on the request completion for a short while we | ||
1190 | * can service the high frequency waits as quick as possible. However, | ||
1191 | * if it is a slow request, we want to sleep as quickly as possible. | ||
1192 | * The tradeoff between waiting and sleeping is roughly the time it | ||
1193 | * takes to sleep on a request, on the order of a microsecond. | ||
1194 | */ | ||
1152 | 1195 | ||
1153 | if (i915_gem_request_get_ring(req)->irq_refcount) | 1196 | if (req->ring->irq_refcount) |
1154 | return -EBUSY; | 1197 | return -EBUSY; |
1155 | 1198 | ||
1156 | timeout = jiffies + 1; | 1199 | /* Only spin if we know the GPU is processing this request */ |
1200 | if (!i915_gem_request_started(req, true)) | ||
1201 | return -EAGAIN; | ||
1202 | |||
1203 | timeout = local_clock_us(&cpu) + 5; | ||
1157 | while (!need_resched()) { | 1204 | while (!need_resched()) { |
1158 | if (i915_gem_request_completed(req, true)) | 1205 | if (i915_gem_request_completed(req, true)) |
1159 | return 0; | 1206 | return 0; |
1160 | 1207 | ||
1161 | if (time_after_eq(jiffies, timeout)) | 1208 | if (signal_pending_state(state, current)) |
1209 | break; | ||
1210 | |||
1211 | if (busywait_stop(timeout, cpu)) | ||
1162 | break; | 1212 | break; |
1163 | 1213 | ||
1164 | cpu_relax_lowlatency(); | 1214 | cpu_relax_lowlatency(); |
1165 | } | 1215 | } |
1216 | |||
1166 | if (i915_gem_request_completed(req, false)) | 1217 | if (i915_gem_request_completed(req, false)) |
1167 | return 0; | 1218 | return 0; |
1168 | 1219 | ||
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, | |||
1197 | struct drm_i915_private *dev_priv = dev->dev_private; | 1248 | struct drm_i915_private *dev_priv = dev->dev_private; |
1198 | const bool irq_test_in_progress = | 1249 | const bool irq_test_in_progress = |
1199 | ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); | 1250 | ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); |
1251 | int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; | ||
1200 | DEFINE_WAIT(wait); | 1252 | DEFINE_WAIT(wait); |
1201 | unsigned long timeout_expire; | 1253 | unsigned long timeout_expire; |
1202 | s64 before, now; | 1254 | s64 before, now; |
@@ -1210,8 +1262,16 @@ int __i915_wait_request(struct drm_i915_gem_request *req, | |||
1210 | if (i915_gem_request_completed(req, true)) | 1262 | if (i915_gem_request_completed(req, true)) |
1211 | return 0; | 1263 | return 0; |
1212 | 1264 | ||
1213 | timeout_expire = timeout ? | 1265 | timeout_expire = 0; |
1214 | jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; | 1266 | if (timeout) { |
1267 | if (WARN_ON(*timeout < 0)) | ||
1268 | return -EINVAL; | ||
1269 | |||
1270 | if (*timeout == 0) | ||
1271 | return -ETIME; | ||
1272 | |||
1273 | timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); | ||
1274 | } | ||
1215 | 1275 | ||
1216 | if (INTEL_INFO(dev_priv)->gen >= 6) | 1276 | if (INTEL_INFO(dev_priv)->gen >= 6) |
1217 | gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); | 1277 | gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); |
@@ -1221,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, | |||
1221 | before = ktime_get_raw_ns(); | 1281 | before = ktime_get_raw_ns(); |
1222 | 1282 | ||
1223 | /* Optimistic spin for the next jiffie before touching IRQs */ | 1283 | /* Optimistic spin for the next jiffie before touching IRQs */ |
1224 | ret = __i915_spin_request(req); | 1284 | ret = __i915_spin_request(req, state); |
1225 | if (ret == 0) | 1285 | if (ret == 0) |
1226 | goto out; | 1286 | goto out; |
1227 | 1287 | ||
@@ -1233,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, | |||
1233 | for (;;) { | 1293 | for (;;) { |
1234 | struct timer_list timer; | 1294 | struct timer_list timer; |
1235 | 1295 | ||
1236 | prepare_to_wait(&ring->irq_queue, &wait, | 1296 | prepare_to_wait(&ring->irq_queue, &wait, state); |
1237 | interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); | ||
1238 | 1297 | ||
1239 | /* We need to check whether any gpu reset happened in between | 1298 | /* We need to check whether any gpu reset happened in between |
1240 | * the caller grabbing the seqno and now ... */ | 1299 | * the caller grabbing the seqno and now ... */ |
@@ -1252,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, | |||
1252 | break; | 1311 | break; |
1253 | } | 1312 | } |
1254 | 1313 | ||
1255 | if (interruptible && signal_pending(current)) { | 1314 | if (signal_pending_state(state, current)) { |
1256 | ret = -ERESTARTSYS; | 1315 | ret = -ERESTARTSYS; |
1257 | break; | 1316 | break; |
1258 | } | 1317 | } |
@@ -2546,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, | |||
2546 | request->batch_obj = obj; | 2605 | request->batch_obj = obj; |
2547 | 2606 | ||
2548 | request->emitted_jiffies = jiffies; | 2607 | request->emitted_jiffies = jiffies; |
2608 | request->previous_seqno = ring->last_submitted_seqno; | ||
2549 | ring->last_submitted_seqno = request->seqno; | 2609 | ring->last_submitted_seqno = request->seqno; |
2550 | list_add_tail(&request->list, &ring->request_list); | 2610 | list_add_tail(&request->list, &ring->request_list); |
2551 | 2611 | ||
@@ -4072,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) | |||
4072 | return false; | 4132 | return false; |
4073 | } | 4133 | } |
4074 | 4134 | ||
4135 | void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) | ||
4136 | { | ||
4137 | struct drm_i915_gem_object *obj = vma->obj; | ||
4138 | bool mappable, fenceable; | ||
4139 | u32 fence_size, fence_alignment; | ||
4140 | |||
4141 | fence_size = i915_gem_get_gtt_size(obj->base.dev, | ||
4142 | obj->base.size, | ||
4143 | obj->tiling_mode); | ||
4144 | fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, | ||
4145 | obj->base.size, | ||
4146 | obj->tiling_mode, | ||
4147 | true); | ||
4148 | |||
4149 | fenceable = (vma->node.size == fence_size && | ||
4150 | (vma->node.start & (fence_alignment - 1)) == 0); | ||
4151 | |||
4152 | mappable = (vma->node.start + fence_size <= | ||
4153 | to_i915(obj->base.dev)->gtt.mappable_end); | ||
4154 | |||
4155 | obj->map_and_fenceable = mappable && fenceable; | ||
4156 | } | ||
4157 | |||
4075 | static int | 4158 | static int |
4076 | i915_gem_object_do_pin(struct drm_i915_gem_object *obj, | 4159 | i915_gem_object_do_pin(struct drm_i915_gem_object *obj, |
4077 | struct i915_address_space *vm, | 4160 | struct i915_address_space *vm, |
@@ -4139,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, | |||
4139 | 4222 | ||
4140 | if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && | 4223 | if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && |
4141 | (bound ^ vma->bound) & GLOBAL_BIND) { | 4224 | (bound ^ vma->bound) & GLOBAL_BIND) { |
4142 | bool mappable, fenceable; | 4225 | __i915_vma_set_map_and_fenceable(vma); |
4143 | u32 fence_size, fence_alignment; | ||
4144 | |||
4145 | fence_size = i915_gem_get_gtt_size(obj->base.dev, | ||
4146 | obj->base.size, | ||
4147 | obj->tiling_mode); | ||
4148 | fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, | ||
4149 | obj->base.size, | ||
4150 | obj->tiling_mode, | ||
4151 | true); | ||
4152 | |||
4153 | fenceable = (vma->node.size == fence_size && | ||
4154 | (vma->node.start & (fence_alignment - 1)) == 0); | ||
4155 | |||
4156 | mappable = (vma->node.start + fence_size <= | ||
4157 | dev_priv->gtt.mappable_end); | ||
4158 | |||
4159 | obj->map_and_fenceable = mappable && fenceable; | ||
4160 | |||
4161 | WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); | 4226 | WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); |
4162 | } | 4227 | } |
4163 | 4228 | ||