diff options
Diffstat (limited to 'drivers/gpu/drm/lima/lima_sched.c')
-rw-r--r-- | drivers/gpu/drm/lima/lima_sched.c | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c new file mode 100644 index 000000000000..97bd9c1deb87 --- /dev/null +++ b/drivers/gpu/drm/lima/lima_sched.c | |||
@@ -0,0 +1,404 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 OR MIT | ||
2 | /* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */ | ||
3 | |||
4 | #include <linux/kthread.h> | ||
5 | #include <linux/slab.h> | ||
6 | |||
7 | #include "lima_drv.h" | ||
8 | #include "lima_sched.h" | ||
9 | #include "lima_vm.h" | ||
10 | #include "lima_mmu.h" | ||
11 | #include "lima_l2_cache.h" | ||
12 | #include "lima_object.h" | ||
13 | |||
14 | struct lima_fence { | ||
15 | struct dma_fence base; | ||
16 | struct lima_sched_pipe *pipe; | ||
17 | }; | ||
18 | |||
19 | static struct kmem_cache *lima_fence_slab; | ||
20 | static int lima_fence_slab_refcnt; | ||
21 | |||
22 | int lima_sched_slab_init(void) | ||
23 | { | ||
24 | if (!lima_fence_slab) { | ||
25 | lima_fence_slab = kmem_cache_create( | ||
26 | "lima_fence", sizeof(struct lima_fence), 0, | ||
27 | SLAB_HWCACHE_ALIGN, NULL); | ||
28 | if (!lima_fence_slab) | ||
29 | return -ENOMEM; | ||
30 | } | ||
31 | |||
32 | lima_fence_slab_refcnt++; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | void lima_sched_slab_fini(void) | ||
37 | { | ||
38 | if (!--lima_fence_slab_refcnt) { | ||
39 | kmem_cache_destroy(lima_fence_slab); | ||
40 | lima_fence_slab = NULL; | ||
41 | } | ||
42 | } | ||
43 | |||
44 | static inline struct lima_fence *to_lima_fence(struct dma_fence *fence) | ||
45 | { | ||
46 | return container_of(fence, struct lima_fence, base); | ||
47 | } | ||
48 | |||
49 | static const char *lima_fence_get_driver_name(struct dma_fence *fence) | ||
50 | { | ||
51 | return "lima"; | ||
52 | } | ||
53 | |||
54 | static const char *lima_fence_get_timeline_name(struct dma_fence *fence) | ||
55 | { | ||
56 | struct lima_fence *f = to_lima_fence(fence); | ||
57 | |||
58 | return f->pipe->base.name; | ||
59 | } | ||
60 | |||
61 | static void lima_fence_release_rcu(struct rcu_head *rcu) | ||
62 | { | ||
63 | struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); | ||
64 | struct lima_fence *fence = to_lima_fence(f); | ||
65 | |||
66 | kmem_cache_free(lima_fence_slab, fence); | ||
67 | } | ||
68 | |||
69 | static void lima_fence_release(struct dma_fence *fence) | ||
70 | { | ||
71 | struct lima_fence *f = to_lima_fence(fence); | ||
72 | |||
73 | call_rcu(&f->base.rcu, lima_fence_release_rcu); | ||
74 | } | ||
75 | |||
76 | static const struct dma_fence_ops lima_fence_ops = { | ||
77 | .get_driver_name = lima_fence_get_driver_name, | ||
78 | .get_timeline_name = lima_fence_get_timeline_name, | ||
79 | .release = lima_fence_release, | ||
80 | }; | ||
81 | |||
82 | static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe) | ||
83 | { | ||
84 | struct lima_fence *fence; | ||
85 | |||
86 | fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL); | ||
87 | if (!fence) | ||
88 | return NULL; | ||
89 | |||
90 | fence->pipe = pipe; | ||
91 | dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock, | ||
92 | pipe->fence_context, ++pipe->fence_seqno); | ||
93 | |||
94 | return fence; | ||
95 | } | ||
96 | |||
97 | static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job) | ||
98 | { | ||
99 | return container_of(job, struct lima_sched_task, base); | ||
100 | } | ||
101 | |||
102 | static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched) | ||
103 | { | ||
104 | return container_of(sched, struct lima_sched_pipe, base); | ||
105 | } | ||
106 | |||
107 | int lima_sched_task_init(struct lima_sched_task *task, | ||
108 | struct lima_sched_context *context, | ||
109 | struct lima_bo **bos, int num_bos, | ||
110 | struct lima_vm *vm) | ||
111 | { | ||
112 | int err, i; | ||
113 | |||
114 | task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL); | ||
115 | if (!task->bos) | ||
116 | return -ENOMEM; | ||
117 | |||
118 | for (i = 0; i < num_bos; i++) | ||
119 | drm_gem_object_get(&bos[i]->gem); | ||
120 | |||
121 | err = drm_sched_job_init(&task->base, &context->base, vm); | ||
122 | if (err) { | ||
123 | kfree(task->bos); | ||
124 | return err; | ||
125 | } | ||
126 | |||
127 | task->num_bos = num_bos; | ||
128 | task->vm = lima_vm_get(vm); | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | void lima_sched_task_fini(struct lima_sched_task *task) | ||
133 | { | ||
134 | int i; | ||
135 | |||
136 | drm_sched_job_cleanup(&task->base); | ||
137 | |||
138 | for (i = 0; i < task->num_dep; i++) | ||
139 | dma_fence_put(task->dep[i]); | ||
140 | |||
141 | kfree(task->dep); | ||
142 | |||
143 | if (task->bos) { | ||
144 | for (i = 0; i < task->num_bos; i++) | ||
145 | drm_gem_object_put_unlocked(&task->bos[i]->gem); | ||
146 | kfree(task->bos); | ||
147 | } | ||
148 | |||
149 | lima_vm_put(task->vm); | ||
150 | } | ||
151 | |||
152 | int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) | ||
153 | { | ||
154 | int i, new_dep = 4; | ||
155 | |||
156 | /* same context's fence is definitly earlier then this task */ | ||
157 | if (fence->context == task->base.s_fence->finished.context) { | ||
158 | dma_fence_put(fence); | ||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | if (task->dep && task->num_dep == task->max_dep) | ||
163 | new_dep = task->max_dep * 2; | ||
164 | |||
165 | if (task->max_dep < new_dep) { | ||
166 | void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL); | ||
167 | |||
168 | if (!dep) | ||
169 | return -ENOMEM; | ||
170 | |||
171 | task->max_dep = new_dep; | ||
172 | task->dep = dep; | ||
173 | } | ||
174 | |||
175 | for (i = 0; i < task->num_dep; i++) { | ||
176 | if (task->dep[i]->context == fence->context && | ||
177 | dma_fence_is_later(fence, task->dep[i])) { | ||
178 | dma_fence_put(task->dep[i]); | ||
179 | task->dep[i] = fence; | ||
180 | return 0; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | task->dep[task->num_dep++] = fence; | ||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | int lima_sched_context_init(struct lima_sched_pipe *pipe, | ||
189 | struct lima_sched_context *context, | ||
190 | atomic_t *guilty) | ||
191 | { | ||
192 | struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL; | ||
193 | |||
194 | return drm_sched_entity_init(&context->base, &rq, 1, guilty); | ||
195 | } | ||
196 | |||
197 | void lima_sched_context_fini(struct lima_sched_pipe *pipe, | ||
198 | struct lima_sched_context *context) | ||
199 | { | ||
200 | drm_sched_entity_fini(&context->base); | ||
201 | } | ||
202 | |||
203 | struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context, | ||
204 | struct lima_sched_task *task) | ||
205 | { | ||
206 | struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished); | ||
207 | |||
208 | drm_sched_entity_push_job(&task->base, &context->base); | ||
209 | return fence; | ||
210 | } | ||
211 | |||
212 | static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job, | ||
213 | struct drm_sched_entity *entity) | ||
214 | { | ||
215 | struct lima_sched_task *task = to_lima_task(job); | ||
216 | int i; | ||
217 | |||
218 | for (i = 0; i < task->num_dep; i++) { | ||
219 | struct dma_fence *fence = task->dep[i]; | ||
220 | |||
221 | if (!task->dep[i]) | ||
222 | continue; | ||
223 | |||
224 | task->dep[i] = NULL; | ||
225 | |||
226 | if (!dma_fence_is_signaled(fence)) | ||
227 | return fence; | ||
228 | |||
229 | dma_fence_put(fence); | ||
230 | } | ||
231 | |||
232 | return NULL; | ||
233 | } | ||
234 | |||
235 | static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job) | ||
236 | { | ||
237 | struct lima_sched_task *task = to_lima_task(job); | ||
238 | struct lima_sched_pipe *pipe = to_lima_pipe(job->sched); | ||
239 | struct lima_fence *fence; | ||
240 | struct dma_fence *ret; | ||
241 | struct lima_vm *vm = NULL, *last_vm = NULL; | ||
242 | int i; | ||
243 | |||
244 | /* after GPU reset */ | ||
245 | if (job->s_fence->finished.error < 0) | ||
246 | return NULL; | ||
247 | |||
248 | fence = lima_fence_create(pipe); | ||
249 | if (!fence) | ||
250 | return NULL; | ||
251 | task->fence = &fence->base; | ||
252 | |||
253 | /* for caller usage of the fence, otherwise irq handler | ||
254 | * may consume the fence before caller use it | ||
255 | */ | ||
256 | ret = dma_fence_get(task->fence); | ||
257 | |||
258 | pipe->current_task = task; | ||
259 | |||
260 | /* this is needed for MMU to work correctly, otherwise GP/PP | ||
261 | * will hang or page fault for unknown reason after running for | ||
262 | * a while. | ||
263 | * | ||
264 | * Need to investigate: | ||
265 | * 1. is it related to TLB | ||
266 | * 2. how much performance will be affected by L2 cache flush | ||
267 | * 3. can we reduce the calling of this function because all | ||
268 | * GP/PP use the same L2 cache on mali400 | ||
269 | * | ||
270 | * TODO: | ||
271 | * 1. move this to task fini to save some wait time? | ||
272 | * 2. when GP/PP use different l2 cache, need PP wait GP l2 | ||
273 | * cache flush? | ||
274 | */ | ||
275 | for (i = 0; i < pipe->num_l2_cache; i++) | ||
276 | lima_l2_cache_flush(pipe->l2_cache[i]); | ||
277 | |||
278 | if (task->vm != pipe->current_vm) { | ||
279 | vm = lima_vm_get(task->vm); | ||
280 | last_vm = pipe->current_vm; | ||
281 | pipe->current_vm = task->vm; | ||
282 | } | ||
283 | |||
284 | if (pipe->bcast_mmu) | ||
285 | lima_mmu_switch_vm(pipe->bcast_mmu, vm); | ||
286 | else { | ||
287 | for (i = 0; i < pipe->num_mmu; i++) | ||
288 | lima_mmu_switch_vm(pipe->mmu[i], vm); | ||
289 | } | ||
290 | |||
291 | if (last_vm) | ||
292 | lima_vm_put(last_vm); | ||
293 | |||
294 | pipe->error = false; | ||
295 | pipe->task_run(pipe, task); | ||
296 | |||
297 | return task->fence; | ||
298 | } | ||
299 | |||
300 | static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe, | ||
301 | struct lima_sched_task *task) | ||
302 | { | ||
303 | drm_sched_stop(&pipe->base); | ||
304 | |||
305 | if (task) | ||
306 | drm_sched_increase_karma(&task->base); | ||
307 | |||
308 | pipe->task_error(pipe); | ||
309 | |||
310 | if (pipe->bcast_mmu) | ||
311 | lima_mmu_page_fault_resume(pipe->bcast_mmu); | ||
312 | else { | ||
313 | int i; | ||
314 | |||
315 | for (i = 0; i < pipe->num_mmu; i++) | ||
316 | lima_mmu_page_fault_resume(pipe->mmu[i]); | ||
317 | } | ||
318 | |||
319 | if (pipe->current_vm) | ||
320 | lima_vm_put(pipe->current_vm); | ||
321 | |||
322 | pipe->current_vm = NULL; | ||
323 | pipe->current_task = NULL; | ||
324 | |||
325 | drm_sched_resubmit_jobs(&pipe->base); | ||
326 | drm_sched_start(&pipe->base, true); | ||
327 | } | ||
328 | |||
329 | static void lima_sched_timedout_job(struct drm_sched_job *job) | ||
330 | { | ||
331 | struct lima_sched_pipe *pipe = to_lima_pipe(job->sched); | ||
332 | struct lima_sched_task *task = to_lima_task(job); | ||
333 | |||
334 | DRM_ERROR("lima job timeout\n"); | ||
335 | |||
336 | lima_sched_handle_error_task(pipe, task); | ||
337 | } | ||
338 | |||
339 | static void lima_sched_free_job(struct drm_sched_job *job) | ||
340 | { | ||
341 | struct lima_sched_task *task = to_lima_task(job); | ||
342 | struct lima_sched_pipe *pipe = to_lima_pipe(job->sched); | ||
343 | struct lima_vm *vm = task->vm; | ||
344 | struct lima_bo **bos = task->bos; | ||
345 | int i; | ||
346 | |||
347 | dma_fence_put(task->fence); | ||
348 | |||
349 | for (i = 0; i < task->num_bos; i++) | ||
350 | lima_vm_bo_del(vm, bos[i]); | ||
351 | |||
352 | lima_sched_task_fini(task); | ||
353 | kmem_cache_free(pipe->task_slab, task); | ||
354 | } | ||
355 | |||
356 | const struct drm_sched_backend_ops lima_sched_ops = { | ||
357 | .dependency = lima_sched_dependency, | ||
358 | .run_job = lima_sched_run_job, | ||
359 | .timedout_job = lima_sched_timedout_job, | ||
360 | .free_job = lima_sched_free_job, | ||
361 | }; | ||
362 | |||
363 | static void lima_sched_error_work(struct work_struct *work) | ||
364 | { | ||
365 | struct lima_sched_pipe *pipe = | ||
366 | container_of(work, struct lima_sched_pipe, error_work); | ||
367 | struct lima_sched_task *task = pipe->current_task; | ||
368 | |||
369 | lima_sched_handle_error_task(pipe, task); | ||
370 | } | ||
371 | |||
372 | int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) | ||
373 | { | ||
374 | long timeout; | ||
375 | |||
376 | if (lima_sched_timeout_ms <= 0) | ||
377 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
378 | else | ||
379 | timeout = msecs_to_jiffies(lima_sched_timeout_ms); | ||
380 | |||
381 | pipe->fence_context = dma_fence_context_alloc(1); | ||
382 | spin_lock_init(&pipe->fence_lock); | ||
383 | |||
384 | INIT_WORK(&pipe->error_work, lima_sched_error_work); | ||
385 | |||
386 | return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name); | ||
387 | } | ||
388 | |||
389 | void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) | ||
390 | { | ||
391 | drm_sched_fini(&pipe->base); | ||
392 | } | ||
393 | |||
394 | void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe) | ||
395 | { | ||
396 | if (pipe->error) | ||
397 | schedule_work(&pipe->error_work); | ||
398 | else { | ||
399 | struct lima_sched_task *task = pipe->current_task; | ||
400 | |||
401 | pipe->task_fini(pipe); | ||
402 | dma_fence_signal(task->fence); | ||
403 | } | ||
404 | } | ||