diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 328 |
1 files changed, 87 insertions, 241 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 43957bb37a42..08fd9b12e4d7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c | |||
@@ -259,63 +259,6 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, | |||
259 | ce->lrc_desc = desc; | 259 | ce->lrc_desc = desc; |
260 | } | 260 | } |
261 | 261 | ||
262 | static struct i915_priolist * | ||
263 | lookup_priolist(struct intel_engine_cs *engine, int prio) | ||
264 | { | ||
265 | struct intel_engine_execlists * const execlists = &engine->execlists; | ||
266 | struct i915_priolist *p; | ||
267 | struct rb_node **parent, *rb; | ||
268 | bool first = true; | ||
269 | |||
270 | if (unlikely(execlists->no_priolist)) | ||
271 | prio = I915_PRIORITY_NORMAL; | ||
272 | |||
273 | find_priolist: | ||
274 | /* most positive priority is scheduled first, equal priorities fifo */ | ||
275 | rb = NULL; | ||
276 | parent = &execlists->queue.rb_root.rb_node; | ||
277 | while (*parent) { | ||
278 | rb = *parent; | ||
279 | p = to_priolist(rb); | ||
280 | if (prio > p->priority) { | ||
281 | parent = &rb->rb_left; | ||
282 | } else if (prio < p->priority) { | ||
283 | parent = &rb->rb_right; | ||
284 | first = false; | ||
285 | } else { | ||
286 | return p; | ||
287 | } | ||
288 | } | ||
289 | |||
290 | if (prio == I915_PRIORITY_NORMAL) { | ||
291 | p = &execlists->default_priolist; | ||
292 | } else { | ||
293 | p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); | ||
294 | /* Convert an allocation failure to a priority bump */ | ||
295 | if (unlikely(!p)) { | ||
296 | prio = I915_PRIORITY_NORMAL; /* recurses just once */ | ||
297 | |||
298 | /* To maintain ordering with all rendering, after an | ||
299 | * allocation failure we have to disable all scheduling. | ||
300 | * Requests will then be executed in fifo, and schedule | ||
301 | * will ensure that dependencies are emitted in fifo. | ||
302 | * There will be still some reordering with existing | ||
303 | * requests, so if userspace lied about their | ||
304 | * dependencies that reordering may be visible. | ||
305 | */ | ||
306 | execlists->no_priolist = true; | ||
307 | goto find_priolist; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | p->priority = prio; | ||
312 | INIT_LIST_HEAD(&p->requests); | ||
313 | rb_link_node(&p->node, rb, parent); | ||
314 | rb_insert_color_cached(&p->node, &execlists->queue, first); | ||
315 | |||
316 | return p; | ||
317 | } | ||
318 | |||
319 | static void unwind_wa_tail(struct i915_request *rq) | 262 | static void unwind_wa_tail(struct i915_request *rq) |
320 | { | 263 | { |
321 | rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); | 264 | rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); |
@@ -324,9 +267,9 @@ static void unwind_wa_tail(struct i915_request *rq) | |||
324 | 267 | ||
325 | static void __unwind_incomplete_requests(struct intel_engine_cs *engine) | 268 | static void __unwind_incomplete_requests(struct intel_engine_cs *engine) |
326 | { | 269 | { |
327 | struct i915_request *rq, *rn; | 270 | struct i915_request *rq, *rn, *active = NULL; |
328 | struct i915_priolist *uninitialized_var(p); | 271 | struct list_head *uninitialized_var(pl); |
329 | int last_prio = I915_PRIORITY_INVALID; | 272 | int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT; |
330 | 273 | ||
331 | lockdep_assert_held(&engine->timeline.lock); | 274 | lockdep_assert_held(&engine->timeline.lock); |
332 | 275 | ||
@@ -334,19 +277,34 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) | |||
334 | &engine->timeline.requests, | 277 | &engine->timeline.requests, |
335 | link) { | 278 | link) { |
336 | if (i915_request_completed(rq)) | 279 | if (i915_request_completed(rq)) |
337 | return; | 280 | break; |
338 | 281 | ||
339 | __i915_request_unsubmit(rq); | 282 | __i915_request_unsubmit(rq); |
340 | unwind_wa_tail(rq); | 283 | unwind_wa_tail(rq); |
341 | 284 | ||
285 | GEM_BUG_ON(rq->hw_context->active); | ||
286 | |||
342 | GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); | 287 | GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); |
343 | if (rq_prio(rq) != last_prio) { | 288 | if (rq_prio(rq) != prio) { |
344 | last_prio = rq_prio(rq); | 289 | prio = rq_prio(rq); |
345 | p = lookup_priolist(engine, last_prio); | 290 | pl = i915_sched_lookup_priolist(engine, prio); |
346 | } | 291 | } |
292 | GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); | ||
293 | |||
294 | list_add(&rq->sched.link, pl); | ||
347 | 295 | ||
348 | GEM_BUG_ON(p->priority != rq_prio(rq)); | 296 | active = rq; |
349 | list_add(&rq->sched.link, &p->requests); | 297 | } |
298 | |||
299 | /* | ||
300 | * The active request is now effectively the start of a new client | ||
301 | * stream, so give it the equivalent small priority bump to prevent | ||
302 | * it being gazumped a second time by another peer. | ||
303 | */ | ||
304 | if (!(prio & I915_PRIORITY_NEWCLIENT)) { | ||
305 | prio |= I915_PRIORITY_NEWCLIENT; | ||
306 | list_move_tail(&active->sched.link, | ||
307 | i915_sched_lookup_priolist(engine, prio)); | ||
350 | } | 308 | } |
351 | } | 309 | } |
352 | 310 | ||
@@ -355,13 +313,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) | |||
355 | { | 313 | { |
356 | struct intel_engine_cs *engine = | 314 | struct intel_engine_cs *engine = |
357 | container_of(execlists, typeof(*engine), execlists); | 315 | container_of(execlists, typeof(*engine), execlists); |
358 | unsigned long flags; | ||
359 | |||
360 | spin_lock_irqsave(&engine->timeline.lock, flags); | ||
361 | 316 | ||
362 | __unwind_incomplete_requests(engine); | 317 | __unwind_incomplete_requests(engine); |
363 | |||
364 | spin_unlock_irqrestore(&engine->timeline.lock, flags); | ||
365 | } | 318 | } |
366 | 319 | ||
367 | static inline void | 320 | static inline void |
@@ -394,13 +347,17 @@ execlists_user_end(struct intel_engine_execlists *execlists) | |||
394 | static inline void | 347 | static inline void |
395 | execlists_context_schedule_in(struct i915_request *rq) | 348 | execlists_context_schedule_in(struct i915_request *rq) |
396 | { | 349 | { |
350 | GEM_BUG_ON(rq->hw_context->active); | ||
351 | |||
397 | execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); | 352 | execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); |
398 | intel_engine_context_in(rq->engine); | 353 | intel_engine_context_in(rq->engine); |
354 | rq->hw_context->active = rq->engine; | ||
399 | } | 355 | } |
400 | 356 | ||
401 | static inline void | 357 | static inline void |
402 | execlists_context_schedule_out(struct i915_request *rq, unsigned long status) | 358 | execlists_context_schedule_out(struct i915_request *rq, unsigned long status) |
403 | { | 359 | { |
360 | rq->hw_context->active = NULL; | ||
404 | intel_engine_context_out(rq->engine); | 361 | intel_engine_context_out(rq->engine); |
405 | execlists_context_status_change(rq, status); | 362 | execlists_context_status_change(rq, status); |
406 | trace_i915_request_out(rq); | 363 | trace_i915_request_out(rq); |
@@ -417,21 +374,32 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) | |||
417 | 374 | ||
418 | static u64 execlists_update_context(struct i915_request *rq) | 375 | static u64 execlists_update_context(struct i915_request *rq) |
419 | { | 376 | { |
377 | struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; | ||
420 | struct intel_context *ce = rq->hw_context; | 378 | struct intel_context *ce = rq->hw_context; |
421 | struct i915_hw_ppgtt *ppgtt = | ||
422 | rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt; | ||
423 | u32 *reg_state = ce->lrc_reg_state; | 379 | u32 *reg_state = ce->lrc_reg_state; |
424 | 380 | ||
425 | reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); | 381 | reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); |
426 | 382 | ||
427 | /* True 32b PPGTT with dynamic page allocation: update PDP | 383 | /* |
384 | * True 32b PPGTT with dynamic page allocation: update PDP | ||
428 | * registers and point the unallocated PDPs to scratch page. | 385 | * registers and point the unallocated PDPs to scratch page. |
429 | * PML4 is allocated during ppgtt init, so this is not needed | 386 | * PML4 is allocated during ppgtt init, so this is not needed |
430 | * in 48-bit mode. | 387 | * in 48-bit mode. |
431 | */ | 388 | */ |
432 | if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm)) | 389 | if (!i915_vm_is_48bit(&ppgtt->vm)) |
433 | execlists_update_context_pdps(ppgtt, reg_state); | 390 | execlists_update_context_pdps(ppgtt, reg_state); |
434 | 391 | ||
392 | /* | ||
393 | * Make sure the context image is complete before we submit it to HW. | ||
394 | * | ||
395 | * Ostensibly, writes (including the WCB) should be flushed prior to | ||
396 | * an uncached write such as our mmio register access, the empirical | ||
397 | * evidence (esp. on Braswell) suggests that the WC write into memory | ||
398 | * may not be visible to the HW prior to the completion of the UC | ||
399 | * register write and that we may begin execution from the context | ||
400 | * before its image is complete leading to invalid PD chasing. | ||
401 | */ | ||
402 | wmb(); | ||
435 | return ce->lrc_desc; | 403 | return ce->lrc_desc; |
436 | } | 404 | } |
437 | 405 | ||
@@ -669,8 +637,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) | |||
669 | while ((rb = rb_first_cached(&execlists->queue))) { | 637 | while ((rb = rb_first_cached(&execlists->queue))) { |
670 | struct i915_priolist *p = to_priolist(rb); | 638 | struct i915_priolist *p = to_priolist(rb); |
671 | struct i915_request *rq, *rn; | 639 | struct i915_request *rq, *rn; |
640 | int i; | ||
672 | 641 | ||
673 | list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { | 642 | priolist_for_each_request_consume(rq, rn, p, i) { |
674 | /* | 643 | /* |
675 | * Can we combine this request with the current port? | 644 | * Can we combine this request with the current port? |
676 | * It has to be the same context/ringbuffer and not | 645 | * It has to be the same context/ringbuffer and not |
@@ -689,11 +658,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) | |||
689 | * combine this request with the last, then we | 658 | * combine this request with the last, then we |
690 | * are done. | 659 | * are done. |
691 | */ | 660 | */ |
692 | if (port == last_port) { | 661 | if (port == last_port) |
693 | __list_del_many(&p->requests, | ||
694 | &rq->sched.link); | ||
695 | goto done; | 662 | goto done; |
696 | } | ||
697 | 663 | ||
698 | /* | 664 | /* |
699 | * If GVT overrides us we only ever submit | 665 | * If GVT overrides us we only ever submit |
@@ -703,11 +669,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) | |||
703 | * request) to the second port. | 669 | * request) to the second port. |
704 | */ | 670 | */ |
705 | if (ctx_single_port_submission(last->hw_context) || | 671 | if (ctx_single_port_submission(last->hw_context) || |
706 | ctx_single_port_submission(rq->hw_context)) { | 672 | ctx_single_port_submission(rq->hw_context)) |
707 | __list_del_many(&p->requests, | ||
708 | &rq->sched.link); | ||
709 | goto done; | 673 | goto done; |
710 | } | ||
711 | 674 | ||
712 | GEM_BUG_ON(last->hw_context == rq->hw_context); | 675 | GEM_BUG_ON(last->hw_context == rq->hw_context); |
713 | 676 | ||
@@ -718,15 +681,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) | |||
718 | GEM_BUG_ON(port_isset(port)); | 681 | GEM_BUG_ON(port_isset(port)); |
719 | } | 682 | } |
720 | 683 | ||
721 | INIT_LIST_HEAD(&rq->sched.link); | 684 | list_del_init(&rq->sched.link); |
685 | |||
722 | __i915_request_submit(rq); | 686 | __i915_request_submit(rq); |
723 | trace_i915_request_in(rq, port_index(port, execlists)); | 687 | trace_i915_request_in(rq, port_index(port, execlists)); |
688 | |||
724 | last = rq; | 689 | last = rq; |
725 | submit = true; | 690 | submit = true; |
726 | } | 691 | } |
727 | 692 | ||
728 | rb_erase_cached(&p->node, &execlists->queue); | 693 | rb_erase_cached(&p->node, &execlists->queue); |
729 | INIT_LIST_HEAD(&p->requests); | ||
730 | if (p->priority != I915_PRIORITY_NORMAL) | 694 | if (p->priority != I915_PRIORITY_NORMAL) |
731 | kmem_cache_free(engine->i915->priorities, p); | 695 | kmem_cache_free(engine->i915->priorities, p); |
732 | } | 696 | } |
@@ -861,16 +825,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) | |||
861 | /* Flush the queued requests to the timeline list (for retiring). */ | 825 | /* Flush the queued requests to the timeline list (for retiring). */ |
862 | while ((rb = rb_first_cached(&execlists->queue))) { | 826 | while ((rb = rb_first_cached(&execlists->queue))) { |
863 | struct i915_priolist *p = to_priolist(rb); | 827 | struct i915_priolist *p = to_priolist(rb); |
828 | int i; | ||
864 | 829 | ||
865 | list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { | 830 | priolist_for_each_request_consume(rq, rn, p, i) { |
866 | INIT_LIST_HEAD(&rq->sched.link); | 831 | list_del_init(&rq->sched.link); |
867 | 832 | ||
868 | dma_fence_set_error(&rq->fence, -EIO); | 833 | dma_fence_set_error(&rq->fence, -EIO); |
869 | __i915_request_submit(rq); | 834 | __i915_request_submit(rq); |
870 | } | 835 | } |
871 | 836 | ||
872 | rb_erase_cached(&p->node, &execlists->queue); | 837 | rb_erase_cached(&p->node, &execlists->queue); |
873 | INIT_LIST_HEAD(&p->requests); | ||
874 | if (p->priority != I915_PRIORITY_NORMAL) | 838 | if (p->priority != I915_PRIORITY_NORMAL) |
875 | kmem_cache_free(engine->i915->priorities, p); | 839 | kmem_cache_free(engine->i915->priorities, p); |
876 | } | 840 | } |
@@ -1076,13 +1040,7 @@ static void queue_request(struct intel_engine_cs *engine, | |||
1076 | struct i915_sched_node *node, | 1040 | struct i915_sched_node *node, |
1077 | int prio) | 1041 | int prio) |
1078 | { | 1042 | { |
1079 | list_add_tail(&node->link, | 1043 | list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); |
1080 | &lookup_priolist(engine, prio)->requests); | ||
1081 | } | ||
1082 | |||
1083 | static void __update_queue(struct intel_engine_cs *engine, int prio) | ||
1084 | { | ||
1085 | engine->execlists.queue_priority = prio; | ||
1086 | } | 1044 | } |
1087 | 1045 | ||
1088 | static void __submit_queue_imm(struct intel_engine_cs *engine) | 1046 | static void __submit_queue_imm(struct intel_engine_cs *engine) |
@@ -1101,7 +1059,7 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) | |||
1101 | static void submit_queue(struct intel_engine_cs *engine, int prio) | 1059 | static void submit_queue(struct intel_engine_cs *engine, int prio) |
1102 | { | 1060 | { |
1103 | if (prio > engine->execlists.queue_priority) { | 1061 | if (prio > engine->execlists.queue_priority) { |
1104 | __update_queue(engine, prio); | 1062 | engine->execlists.queue_priority = prio; |
1105 | __submit_queue_imm(engine); | 1063 | __submit_queue_imm(engine); |
1106 | } | 1064 | } |
1107 | } | 1065 | } |
@@ -1124,139 +1082,6 @@ static void execlists_submit_request(struct i915_request *request) | |||
1124 | spin_unlock_irqrestore(&engine->timeline.lock, flags); | 1082 | spin_unlock_irqrestore(&engine->timeline.lock, flags); |
1125 | } | 1083 | } |
1126 | 1084 | ||
1127 | static struct i915_request *sched_to_request(struct i915_sched_node *node) | ||
1128 | { | ||
1129 | return container_of(node, struct i915_request, sched); | ||
1130 | } | ||
1131 | |||
1132 | static struct intel_engine_cs * | ||
1133 | sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) | ||
1134 | { | ||
1135 | struct intel_engine_cs *engine = sched_to_request(node)->engine; | ||
1136 | |||
1137 | GEM_BUG_ON(!locked); | ||
1138 | |||
1139 | if (engine != locked) { | ||
1140 | spin_unlock(&locked->timeline.lock); | ||
1141 | spin_lock(&engine->timeline.lock); | ||
1142 | } | ||
1143 | |||
1144 | return engine; | ||
1145 | } | ||
1146 | |||
1147 | static void execlists_schedule(struct i915_request *request, | ||
1148 | const struct i915_sched_attr *attr) | ||
1149 | { | ||
1150 | struct i915_priolist *uninitialized_var(pl); | ||
1151 | struct intel_engine_cs *engine, *last; | ||
1152 | struct i915_dependency *dep, *p; | ||
1153 | struct i915_dependency stack; | ||
1154 | const int prio = attr->priority; | ||
1155 | LIST_HEAD(dfs); | ||
1156 | |||
1157 | GEM_BUG_ON(prio == I915_PRIORITY_INVALID); | ||
1158 | |||
1159 | if (i915_request_completed(request)) | ||
1160 | return; | ||
1161 | |||
1162 | if (prio <= READ_ONCE(request->sched.attr.priority)) | ||
1163 | return; | ||
1164 | |||
1165 | /* Need BKL in order to use the temporary link inside i915_dependency */ | ||
1166 | lockdep_assert_held(&request->i915->drm.struct_mutex); | ||
1167 | |||
1168 | stack.signaler = &request->sched; | ||
1169 | list_add(&stack.dfs_link, &dfs); | ||
1170 | |||
1171 | /* | ||
1172 | * Recursively bump all dependent priorities to match the new request. | ||
1173 | * | ||
1174 | * A naive approach would be to use recursion: | ||
1175 | * static void update_priorities(struct i915_sched_node *node, prio) { | ||
1176 | * list_for_each_entry(dep, &node->signalers_list, signal_link) | ||
1177 | * update_priorities(dep->signal, prio) | ||
1178 | * queue_request(node); | ||
1179 | * } | ||
1180 | * but that may have unlimited recursion depth and so runs a very | ||
1181 | * real risk of overunning the kernel stack. Instead, we build | ||
1182 | * a flat list of all dependencies starting with the current request. | ||
1183 | * As we walk the list of dependencies, we add all of its dependencies | ||
1184 | * to the end of the list (this may include an already visited | ||
1185 | * request) and continue to walk onwards onto the new dependencies. The | ||
1186 | * end result is a topological list of requests in reverse order, the | ||
1187 | * last element in the list is the request we must execute first. | ||
1188 | */ | ||
1189 | list_for_each_entry(dep, &dfs, dfs_link) { | ||
1190 | struct i915_sched_node *node = dep->signaler; | ||
1191 | |||
1192 | /* | ||
1193 | * Within an engine, there can be no cycle, but we may | ||
1194 | * refer to the same dependency chain multiple times | ||
1195 | * (redundant dependencies are not eliminated) and across | ||
1196 | * engines. | ||
1197 | */ | ||
1198 | list_for_each_entry(p, &node->signalers_list, signal_link) { | ||
1199 | GEM_BUG_ON(p == dep); /* no cycles! */ | ||
1200 | |||
1201 | if (i915_sched_node_signaled(p->signaler)) | ||
1202 | continue; | ||
1203 | |||
1204 | GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); | ||
1205 | if (prio > READ_ONCE(p->signaler->attr.priority)) | ||
1206 | list_move_tail(&p->dfs_link, &dfs); | ||
1207 | } | ||
1208 | } | ||
1209 | |||
1210 | /* | ||
1211 | * If we didn't need to bump any existing priorities, and we haven't | ||
1212 | * yet submitted this request (i.e. there is no potential race with | ||
1213 | * execlists_submit_request()), we can set our own priority and skip | ||
1214 | * acquiring the engine locks. | ||
1215 | */ | ||
1216 | if (request->sched.attr.priority == I915_PRIORITY_INVALID) { | ||
1217 | GEM_BUG_ON(!list_empty(&request->sched.link)); | ||
1218 | request->sched.attr = *attr; | ||
1219 | if (stack.dfs_link.next == stack.dfs_link.prev) | ||
1220 | return; | ||
1221 | __list_del_entry(&stack.dfs_link); | ||
1222 | } | ||
1223 | |||
1224 | last = NULL; | ||
1225 | engine = request->engine; | ||
1226 | spin_lock_irq(&engine->timeline.lock); | ||
1227 | |||
1228 | /* Fifo and depth-first replacement ensure our deps execute before us */ | ||
1229 | list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { | ||
1230 | struct i915_sched_node *node = dep->signaler; | ||
1231 | |||
1232 | INIT_LIST_HEAD(&dep->dfs_link); | ||
1233 | |||
1234 | engine = sched_lock_engine(node, engine); | ||
1235 | |||
1236 | if (prio <= node->attr.priority) | ||
1237 | continue; | ||
1238 | |||
1239 | node->attr.priority = prio; | ||
1240 | if (!list_empty(&node->link)) { | ||
1241 | if (last != engine) { | ||
1242 | pl = lookup_priolist(engine, prio); | ||
1243 | last = engine; | ||
1244 | } | ||
1245 | GEM_BUG_ON(pl->priority != prio); | ||
1246 | list_move_tail(&node->link, &pl->requests); | ||
1247 | } | ||
1248 | |||
1249 | if (prio > engine->execlists.queue_priority && | ||
1250 | i915_sw_fence_done(&sched_to_request(node)->submit)) { | ||
1251 | /* defer submission until after all of our updates */ | ||
1252 | __update_queue(engine, prio); | ||
1253 | tasklet_hi_schedule(&engine->execlists.tasklet); | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1257 | spin_unlock_irq(&engine->timeline.lock); | ||
1258 | } | ||
1259 | |||
1260 | static void execlists_context_destroy(struct intel_context *ce) | 1085 | static void execlists_context_destroy(struct intel_context *ce) |
1261 | { | 1086 | { |
1262 | GEM_BUG_ON(ce->pin_count); | 1087 | GEM_BUG_ON(ce->pin_count); |
@@ -1272,6 +1097,28 @@ static void execlists_context_destroy(struct intel_context *ce) | |||
1272 | 1097 | ||
1273 | static void execlists_context_unpin(struct intel_context *ce) | 1098 | static void execlists_context_unpin(struct intel_context *ce) |
1274 | { | 1099 | { |
1100 | struct intel_engine_cs *engine; | ||
1101 | |||
1102 | /* | ||
1103 | * The tasklet may still be using a pointer to our state, via an | ||
1104 | * old request. However, since we know we only unpin the context | ||
1105 | * on retirement of the following request, we know that the last | ||
1106 | * request referencing us will have had a completion CS interrupt. | ||
1107 | * If we see that it is still active, it means that the tasklet hasn't | ||
1108 | * had the chance to run yet; let it run before we teardown the | ||
1109 | * reference it may use. | ||
1110 | */ | ||
1111 | engine = READ_ONCE(ce->active); | ||
1112 | if (unlikely(engine)) { | ||
1113 | unsigned long flags; | ||
1114 | |||
1115 | spin_lock_irqsave(&engine->timeline.lock, flags); | ||
1116 | process_csb(engine); | ||
1117 | spin_unlock_irqrestore(&engine->timeline.lock, flags); | ||
1118 | |||
1119 | GEM_BUG_ON(READ_ONCE(ce->active)); | ||
1120 | } | ||
1121 | |||
1275 | i915_gem_context_unpin_hw_id(ce->gem_context); | 1122 | i915_gem_context_unpin_hw_id(ce->gem_context); |
1276 | 1123 | ||
1277 | intel_ring_unpin(ce->ring); | 1124 | intel_ring_unpin(ce->ring); |
@@ -1375,6 +1222,7 @@ execlists_context_pin(struct intel_engine_cs *engine, | |||
1375 | struct intel_context *ce = to_intel_context(ctx, engine); | 1222 | struct intel_context *ce = to_intel_context(ctx, engine); |
1376 | 1223 | ||
1377 | lockdep_assert_held(&ctx->i915->drm.struct_mutex); | 1224 | lockdep_assert_held(&ctx->i915->drm.struct_mutex); |
1225 | GEM_BUG_ON(!ctx->ppgtt); | ||
1378 | 1226 | ||
1379 | if (likely(ce->pin_count++)) | 1227 | if (likely(ce->pin_count++)) |
1380 | return ce; | 1228 | return ce; |
@@ -1679,7 +1527,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) | |||
1679 | unsigned int i; | 1527 | unsigned int i; |
1680 | int ret; | 1528 | int ret; |
1681 | 1529 | ||
1682 | if (GEM_WARN_ON(engine->id != RCS)) | 1530 | if (GEM_DEBUG_WARN_ON(engine->id != RCS)) |
1683 | return -EINVAL; | 1531 | return -EINVAL; |
1684 | 1532 | ||
1685 | switch (INTEL_GEN(engine->i915)) { | 1533 | switch (INTEL_GEN(engine->i915)) { |
@@ -1718,8 +1566,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) | |||
1718 | */ | 1566 | */ |
1719 | for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { | 1567 | for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { |
1720 | wa_bb[i]->offset = batch_ptr - batch; | 1568 | wa_bb[i]->offset = batch_ptr - batch; |
1721 | if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, | 1569 | if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, |
1722 | CACHELINE_BYTES))) { | 1570 | CACHELINE_BYTES))) { |
1723 | ret = -EINVAL; | 1571 | ret = -EINVAL; |
1724 | break; | 1572 | break; |
1725 | } | 1573 | } |
@@ -1902,7 +1750,7 @@ static void execlists_reset(struct intel_engine_cs *engine, | |||
1902 | unsigned long flags; | 1750 | unsigned long flags; |
1903 | u32 *regs; | 1751 | u32 *regs; |
1904 | 1752 | ||
1905 | GEM_TRACE("%s request global=%x, current=%d\n", | 1753 | GEM_TRACE("%s request global=%d, current=%d\n", |
1906 | engine->name, request ? request->global_seqno : 0, | 1754 | engine->name, request ? request->global_seqno : 0, |
1907 | intel_engine_get_seqno(engine)); | 1755 | intel_engine_get_seqno(engine)); |
1908 | 1756 | ||
@@ -2029,8 +1877,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, | |||
2029 | * it is unsafe in case of lite-restore (because the ctx is | 1877 | * it is unsafe in case of lite-restore (because the ctx is |
2030 | * not idle). PML4 is allocated during ppgtt init so this is | 1878 | * not idle). PML4 is allocated during ppgtt init so this is |
2031 | * not needed in 48-bit.*/ | 1879 | * not needed in 48-bit.*/ |
2032 | if (rq->gem_context->ppgtt && | 1880 | if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) && |
2033 | (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) && | ||
2034 | !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) && | 1881 | !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) && |
2035 | !intel_vgpu_active(rq->i915)) { | 1882 | !intel_vgpu_active(rq->i915)) { |
2036 | ret = intel_logical_ring_emit_pdps(rq); | 1883 | ret = intel_logical_ring_emit_pdps(rq); |
@@ -2109,7 +1956,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) | |||
2109 | 1956 | ||
2110 | if (mode & EMIT_INVALIDATE) { | 1957 | if (mode & EMIT_INVALIDATE) { |
2111 | cmd |= MI_INVALIDATE_TLB; | 1958 | cmd |= MI_INVALIDATE_TLB; |
2112 | if (request->engine->id == VCS) | 1959 | if (request->engine->class == VIDEO_DECODE_CLASS) |
2113 | cmd |= MI_INVALIDATE_BSD; | 1960 | cmd |= MI_INVALIDATE_BSD; |
2114 | } | 1961 | } |
2115 | 1962 | ||
@@ -2294,7 +2141,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) | |||
2294 | { | 2141 | { |
2295 | engine->submit_request = execlists_submit_request; | 2142 | engine->submit_request = execlists_submit_request; |
2296 | engine->cancel_requests = execlists_cancel_requests; | 2143 | engine->cancel_requests = execlists_cancel_requests; |
2297 | engine->schedule = execlists_schedule; | 2144 | engine->schedule = i915_schedule; |
2298 | engine->execlists.tasklet.func = execlists_submission_tasklet; | 2145 | engine->execlists.tasklet.func = execlists_submission_tasklet; |
2299 | 2146 | ||
2300 | engine->reset.prepare = execlists_reset_prepare; | 2147 | engine->reset.prepare = execlists_reset_prepare; |
@@ -2632,7 +2479,6 @@ static void execlists_init_reg_state(u32 *regs, | |||
2632 | struct intel_ring *ring) | 2479 | struct intel_ring *ring) |
2633 | { | 2480 | { |
2634 | struct drm_i915_private *dev_priv = engine->i915; | 2481 | struct drm_i915_private *dev_priv = engine->i915; |
2635 | struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; | ||
2636 | u32 base = engine->mmio_base; | 2482 | u32 base = engine->mmio_base; |
2637 | bool rcs = engine->class == RENDER_CLASS; | 2483 | bool rcs = engine->class == RENDER_CLASS; |
2638 | 2484 | ||
@@ -2704,12 +2550,12 @@ static void execlists_init_reg_state(u32 *regs, | |||
2704 | CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); | 2550 | CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); |
2705 | CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); | 2551 | CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); |
2706 | 2552 | ||
2707 | if (ppgtt && i915_vm_is_48bit(&ppgtt->vm)) { | 2553 | if (i915_vm_is_48bit(&ctx->ppgtt->vm)) { |
2708 | /* 64b PPGTT (48bit canonical) | 2554 | /* 64b PPGTT (48bit canonical) |
2709 | * PDP0_DESCRIPTOR contains the base address to PML4 and | 2555 | * PDP0_DESCRIPTOR contains the base address to PML4 and |
2710 | * other PDP Descriptors are ignored. | 2556 | * other PDP Descriptors are ignored. |
2711 | */ | 2557 | */ |
2712 | ASSIGN_CTX_PML4(ppgtt, regs); | 2558 | ASSIGN_CTX_PML4(ctx->ppgtt, regs); |
2713 | } | 2559 | } |
2714 | 2560 | ||
2715 | if (rcs) { | 2561 | if (rcs) { |