diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_scheduler.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_scheduler.c | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c new file mode 100644 index 000000000000..340faea6c08a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.c | |||
@@ -0,0 +1,399 @@ | |||
1 | /* | ||
2 | * SPDX-License-Identifier: MIT | ||
3 | * | ||
4 | * Copyright © 2018 Intel Corporation | ||
5 | */ | ||
6 | |||
7 | #include <linux/mutex.h> | ||
8 | |||
9 | #include "i915_drv.h" | ||
10 | #include "i915_request.h" | ||
11 | #include "i915_scheduler.h" | ||
12 | |||
13 | static DEFINE_SPINLOCK(schedule_lock); | ||
14 | |||
15 | static const struct i915_request * | ||
16 | node_to_request(const struct i915_sched_node *node) | ||
17 | { | ||
18 | return container_of(node, const struct i915_request, sched); | ||
19 | } | ||
20 | |||
21 | static inline bool node_signaled(const struct i915_sched_node *node) | ||
22 | { | ||
23 | return i915_request_completed(node_to_request(node)); | ||
24 | } | ||
25 | |||
26 | void i915_sched_node_init(struct i915_sched_node *node) | ||
27 | { | ||
28 | INIT_LIST_HEAD(&node->signalers_list); | ||
29 | INIT_LIST_HEAD(&node->waiters_list); | ||
30 | INIT_LIST_HEAD(&node->link); | ||
31 | node->attr.priority = I915_PRIORITY_INVALID; | ||
32 | } | ||
33 | |||
34 | static struct i915_dependency * | ||
35 | i915_dependency_alloc(struct drm_i915_private *i915) | ||
36 | { | ||
37 | return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); | ||
38 | } | ||
39 | |||
40 | static void | ||
41 | i915_dependency_free(struct drm_i915_private *i915, | ||
42 | struct i915_dependency *dep) | ||
43 | { | ||
44 | kmem_cache_free(i915->dependencies, dep); | ||
45 | } | ||
46 | |||
47 | bool __i915_sched_node_add_dependency(struct i915_sched_node *node, | ||
48 | struct i915_sched_node *signal, | ||
49 | struct i915_dependency *dep, | ||
50 | unsigned long flags) | ||
51 | { | ||
52 | bool ret = false; | ||
53 | |||
54 | spin_lock(&schedule_lock); | ||
55 | |||
56 | if (!node_signaled(signal)) { | ||
57 | INIT_LIST_HEAD(&dep->dfs_link); | ||
58 | list_add(&dep->wait_link, &signal->waiters_list); | ||
59 | list_add(&dep->signal_link, &node->signalers_list); | ||
60 | dep->signaler = signal; | ||
61 | dep->flags = flags; | ||
62 | |||
63 | ret = true; | ||
64 | } | ||
65 | |||
66 | spin_unlock(&schedule_lock); | ||
67 | |||
68 | return ret; | ||
69 | } | ||
70 | |||
71 | int i915_sched_node_add_dependency(struct drm_i915_private *i915, | ||
72 | struct i915_sched_node *node, | ||
73 | struct i915_sched_node *signal) | ||
74 | { | ||
75 | struct i915_dependency *dep; | ||
76 | |||
77 | dep = i915_dependency_alloc(i915); | ||
78 | if (!dep) | ||
79 | return -ENOMEM; | ||
80 | |||
81 | if (!__i915_sched_node_add_dependency(node, signal, dep, | ||
82 | I915_DEPENDENCY_ALLOC)) | ||
83 | i915_dependency_free(i915, dep); | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | void i915_sched_node_fini(struct drm_i915_private *i915, | ||
89 | struct i915_sched_node *node) | ||
90 | { | ||
91 | struct i915_dependency *dep, *tmp; | ||
92 | |||
93 | GEM_BUG_ON(!list_empty(&node->link)); | ||
94 | |||
95 | spin_lock(&schedule_lock); | ||
96 | |||
97 | /* | ||
98 | * Everyone we depended upon (the fences we wait to be signaled) | ||
99 | * should retire before us and remove themselves from our list. | ||
100 | * However, retirement is run independently on each timeline and | ||
101 | * so we may be called out-of-order. | ||
102 | */ | ||
103 | list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { | ||
104 | GEM_BUG_ON(!node_signaled(dep->signaler)); | ||
105 | GEM_BUG_ON(!list_empty(&dep->dfs_link)); | ||
106 | |||
107 | list_del(&dep->wait_link); | ||
108 | if (dep->flags & I915_DEPENDENCY_ALLOC) | ||
109 | i915_dependency_free(i915, dep); | ||
110 | } | ||
111 | |||
112 | /* Remove ourselves from everyone who depends upon us */ | ||
113 | list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { | ||
114 | GEM_BUG_ON(dep->signaler != node); | ||
115 | GEM_BUG_ON(!list_empty(&dep->dfs_link)); | ||
116 | |||
117 | list_del(&dep->signal_link); | ||
118 | if (dep->flags & I915_DEPENDENCY_ALLOC) | ||
119 | i915_dependency_free(i915, dep); | ||
120 | } | ||
121 | |||
122 | spin_unlock(&schedule_lock); | ||
123 | } | ||
124 | |||
125 | static inline struct i915_priolist *to_priolist(struct rb_node *rb) | ||
126 | { | ||
127 | return rb_entry(rb, struct i915_priolist, node); | ||
128 | } | ||
129 | |||
130 | static void assert_priolists(struct intel_engine_execlists * const execlists, | ||
131 | long queue_priority) | ||
132 | { | ||
133 | struct rb_node *rb; | ||
134 | long last_prio, i; | ||
135 | |||
136 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) | ||
137 | return; | ||
138 | |||
139 | GEM_BUG_ON(rb_first_cached(&execlists->queue) != | ||
140 | rb_first(&execlists->queue.rb_root)); | ||
141 | |||
142 | last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; | ||
143 | for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { | ||
144 | const struct i915_priolist *p = to_priolist(rb); | ||
145 | |||
146 | GEM_BUG_ON(p->priority >= last_prio); | ||
147 | last_prio = p->priority; | ||
148 | |||
149 | GEM_BUG_ON(!p->used); | ||
150 | for (i = 0; i < ARRAY_SIZE(p->requests); i++) { | ||
151 | if (list_empty(&p->requests[i])) | ||
152 | continue; | ||
153 | |||
154 | GEM_BUG_ON(!(p->used & BIT(i))); | ||
155 | } | ||
156 | } | ||
157 | } | ||
158 | |||
159 | struct list_head * | ||
160 | i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) | ||
161 | { | ||
162 | struct intel_engine_execlists * const execlists = &engine->execlists; | ||
163 | struct i915_priolist *p; | ||
164 | struct rb_node **parent, *rb; | ||
165 | bool first = true; | ||
166 | int idx, i; | ||
167 | |||
168 | lockdep_assert_held(&engine->timeline.lock); | ||
169 | assert_priolists(execlists, INT_MAX); | ||
170 | |||
171 | /* buckets sorted from highest [in slot 0] to lowest priority */ | ||
172 | idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; | ||
173 | prio >>= I915_USER_PRIORITY_SHIFT; | ||
174 | if (unlikely(execlists->no_priolist)) | ||
175 | prio = I915_PRIORITY_NORMAL; | ||
176 | |||
177 | find_priolist: | ||
178 | /* most positive priority is scheduled first, equal priorities fifo */ | ||
179 | rb = NULL; | ||
180 | parent = &execlists->queue.rb_root.rb_node; | ||
181 | while (*parent) { | ||
182 | rb = *parent; | ||
183 | p = to_priolist(rb); | ||
184 | if (prio > p->priority) { | ||
185 | parent = &rb->rb_left; | ||
186 | } else if (prio < p->priority) { | ||
187 | parent = &rb->rb_right; | ||
188 | first = false; | ||
189 | } else { | ||
190 | goto out; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | if (prio == I915_PRIORITY_NORMAL) { | ||
195 | p = &execlists->default_priolist; | ||
196 | } else { | ||
197 | p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); | ||
198 | /* Convert an allocation failure to a priority bump */ | ||
199 | if (unlikely(!p)) { | ||
200 | prio = I915_PRIORITY_NORMAL; /* recurses just once */ | ||
201 | |||
202 | /* To maintain ordering with all rendering, after an | ||
203 | * allocation failure we have to disable all scheduling. | ||
204 | * Requests will then be executed in fifo, and schedule | ||
205 | * will ensure that dependencies are emitted in fifo. | ||
206 | * There will be still some reordering with existing | ||
207 | * requests, so if userspace lied about their | ||
208 | * dependencies that reordering may be visible. | ||
209 | */ | ||
210 | execlists->no_priolist = true; | ||
211 | goto find_priolist; | ||
212 | } | ||
213 | } | ||
214 | |||
215 | p->priority = prio; | ||
216 | for (i = 0; i < ARRAY_SIZE(p->requests); i++) | ||
217 | INIT_LIST_HEAD(&p->requests[i]); | ||
218 | rb_link_node(&p->node, rb, parent); | ||
219 | rb_insert_color_cached(&p->node, &execlists->queue, first); | ||
220 | p->used = 0; | ||
221 | |||
222 | out: | ||
223 | p->used |= BIT(idx); | ||
224 | return &p->requests[idx]; | ||
225 | } | ||
226 | |||
227 | static struct intel_engine_cs * | ||
228 | sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) | ||
229 | { | ||
230 | struct intel_engine_cs *engine = node_to_request(node)->engine; | ||
231 | |||
232 | GEM_BUG_ON(!locked); | ||
233 | |||
234 | if (engine != locked) { | ||
235 | spin_unlock(&locked->timeline.lock); | ||
236 | spin_lock(&engine->timeline.lock); | ||
237 | } | ||
238 | |||
239 | return engine; | ||
240 | } | ||
241 | |||
242 | static void __i915_schedule(struct i915_request *rq, | ||
243 | const struct i915_sched_attr *attr) | ||
244 | { | ||
245 | struct list_head *uninitialized_var(pl); | ||
246 | struct intel_engine_cs *engine, *last; | ||
247 | struct i915_dependency *dep, *p; | ||
248 | struct i915_dependency stack; | ||
249 | const int prio = attr->priority; | ||
250 | LIST_HEAD(dfs); | ||
251 | |||
252 | /* Needed in order to use the temporary link inside i915_dependency */ | ||
253 | lockdep_assert_held(&schedule_lock); | ||
254 | GEM_BUG_ON(prio == I915_PRIORITY_INVALID); | ||
255 | |||
256 | if (i915_request_completed(rq)) | ||
257 | return; | ||
258 | |||
259 | if (prio <= READ_ONCE(rq->sched.attr.priority)) | ||
260 | return; | ||
261 | |||
262 | stack.signaler = &rq->sched; | ||
263 | list_add(&stack.dfs_link, &dfs); | ||
264 | |||
265 | /* | ||
266 | * Recursively bump all dependent priorities to match the new request. | ||
267 | * | ||
268 | * A naive approach would be to use recursion: | ||
269 | * static void update_priorities(struct i915_sched_node *node, prio) { | ||
270 | * list_for_each_entry(dep, &node->signalers_list, signal_link) | ||
271 | * update_priorities(dep->signal, prio) | ||
272 | * queue_request(node); | ||
273 | * } | ||
274 | * but that may have unlimited recursion depth and so runs a very | ||
275 | * real risk of overunning the kernel stack. Instead, we build | ||
276 | * a flat list of all dependencies starting with the current request. | ||
277 | * As we walk the list of dependencies, we add all of its dependencies | ||
278 | * to the end of the list (this may include an already visited | ||
279 | * request) and continue to walk onwards onto the new dependencies. The | ||
280 | * end result is a topological list of requests in reverse order, the | ||
281 | * last element in the list is the request we must execute first. | ||
282 | */ | ||
283 | list_for_each_entry(dep, &dfs, dfs_link) { | ||
284 | struct i915_sched_node *node = dep->signaler; | ||
285 | |||
286 | /* | ||
287 | * Within an engine, there can be no cycle, but we may | ||
288 | * refer to the same dependency chain multiple times | ||
289 | * (redundant dependencies are not eliminated) and across | ||
290 | * engines. | ||
291 | */ | ||
292 | list_for_each_entry(p, &node->signalers_list, signal_link) { | ||
293 | GEM_BUG_ON(p == dep); /* no cycles! */ | ||
294 | |||
295 | if (node_signaled(p->signaler)) | ||
296 | continue; | ||
297 | |||
298 | GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); | ||
299 | if (prio > READ_ONCE(p->signaler->attr.priority)) | ||
300 | list_move_tail(&p->dfs_link, &dfs); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * If we didn't need to bump any existing priorities, and we haven't | ||
306 | * yet submitted this request (i.e. there is no potential race with | ||
307 | * execlists_submit_request()), we can set our own priority and skip | ||
308 | * acquiring the engine locks. | ||
309 | */ | ||
310 | if (rq->sched.attr.priority == I915_PRIORITY_INVALID) { | ||
311 | GEM_BUG_ON(!list_empty(&rq->sched.link)); | ||
312 | rq->sched.attr = *attr; | ||
313 | |||
314 | if (stack.dfs_link.next == stack.dfs_link.prev) | ||
315 | return; | ||
316 | |||
317 | __list_del_entry(&stack.dfs_link); | ||
318 | } | ||
319 | |||
320 | last = NULL; | ||
321 | engine = rq->engine; | ||
322 | spin_lock_irq(&engine->timeline.lock); | ||
323 | |||
324 | /* Fifo and depth-first replacement ensure our deps execute before us */ | ||
325 | list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { | ||
326 | struct i915_sched_node *node = dep->signaler; | ||
327 | |||
328 | INIT_LIST_HEAD(&dep->dfs_link); | ||
329 | |||
330 | engine = sched_lock_engine(node, engine); | ||
331 | |||
332 | /* Recheck after acquiring the engine->timeline.lock */ | ||
333 | if (prio <= node->attr.priority || node_signaled(node)) | ||
334 | continue; | ||
335 | |||
336 | node->attr.priority = prio; | ||
337 | if (!list_empty(&node->link)) { | ||
338 | if (last != engine) { | ||
339 | pl = i915_sched_lookup_priolist(engine, prio); | ||
340 | last = engine; | ||
341 | } | ||
342 | list_move_tail(&node->link, pl); | ||
343 | } else { | ||
344 | /* | ||
345 | * If the request is not in the priolist queue because | ||
346 | * it is not yet runnable, then it doesn't contribute | ||
347 | * to our preemption decisions. On the other hand, | ||
348 | * if the request is on the HW, it too is not in the | ||
349 | * queue; but in that case we may still need to reorder | ||
350 | * the inflight requests. | ||
351 | */ | ||
352 | if (!i915_sw_fence_done(&node_to_request(node)->submit)) | ||
353 | continue; | ||
354 | } | ||
355 | |||
356 | if (prio <= engine->execlists.queue_priority) | ||
357 | continue; | ||
358 | |||
359 | /* | ||
360 | * If we are already the currently executing context, don't | ||
361 | * bother evaluating if we should preempt ourselves. | ||
362 | */ | ||
363 | if (node_to_request(node)->global_seqno && | ||
364 | i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, | ||
365 | node_to_request(node)->global_seqno)) | ||
366 | continue; | ||
367 | |||
368 | /* Defer (tasklet) submission until after all of our updates. */ | ||
369 | engine->execlists.queue_priority = prio; | ||
370 | tasklet_hi_schedule(&engine->execlists.tasklet); | ||
371 | } | ||
372 | |||
373 | spin_unlock_irq(&engine->timeline.lock); | ||
374 | } | ||
375 | |||
376 | void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) | ||
377 | { | ||
378 | spin_lock(&schedule_lock); | ||
379 | __i915_schedule(rq, attr); | ||
380 | spin_unlock(&schedule_lock); | ||
381 | } | ||
382 | |||
383 | void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) | ||
384 | { | ||
385 | struct i915_sched_attr attr; | ||
386 | |||
387 | GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); | ||
388 | |||
389 | if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) | ||
390 | return; | ||
391 | |||
392 | spin_lock_bh(&schedule_lock); | ||
393 | |||
394 | attr = rq->sched.attr; | ||
395 | attr.priority |= bump; | ||
396 | __i915_schedule(rq, &attr); | ||
397 | |||
398 | spin_unlock_bh(&schedule_lock); | ||
399 | } | ||