aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/lima/lima_sched.c
diff options
context:
space:
mode:
authorQiang Yu <yuq825@gmail.com>2019-03-09 07:20:12 -0500
committerEric Anholt <eric@anholt.net>2019-04-01 13:45:20 -0400
commita1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
treeafee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_sched.c
parent6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)
drm/lima: driver for ARM Mali4xx GPUs
- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for OpenGL vertex shader processing and PP is for fragment shader processing. Each processor has its own MMU so prcessors work in virtual address space. - There's only one GP but multiple PP (max 4 for mali 400 and 8 for mali 450) in the same mali 4xx GPU. All PPs are grouped togather to handle a single fragment shader task divided by FB output tiled pixels. Mali 400 user space driver is responsible for assign target tiled pixels to each PP, but mali 450 has a HW module called DLBU to dynamically balance each PP's load. - User space driver allocate buffer object and map into GPU virtual address space, upload command stream and draw data with CPU mmap of the buffer object, then submit task to GP/PP with a register frame indicating where is the command stream and misc settings. - There's no command stream validation/relocation due to each user process has its own GPU virtual address space. GP/PP's MMU switch virtual address space before running two tasks from different user process. Error or evil user space code just get MMU fault or GP/PP error IRQ, then the HW/SW will be recovered. - Use GEM+shmem for MM. Currently just alloc and pin memory when gem object creation. GPU vm map of the buffer is also done in the alloc stage in kernel space. We may delay the memory allocation and real GPU vm map to command submission stage in the furture as improvement. - Use drm_sched for GPU task schedule. Each OpenGL context should have a lima context object in the kernel to distinguish tasks from different user. drm_sched gets task from each lima context in a fair way. mesa driver can be found here before upstreamed: https://gitlab.freedesktop.org/lima/mesa v8: - add comments for in_sync - fix ctx free miss mutex unlock v7: - remove lima_fence_ops with default value - move fence slab create to device probe - check pad ioctl args to be zero - add comments for user/kernel interface v6: - fix comments by checkpatch.pl v5: - export gp/pp version to userspace - rebase on drm-misc-next v4: - use get param interface to get info - separate context create/free ioctl - remove unused max sched task param - update copyright time - use xarray instead of idr - stop using drmP.h v3: - fix comments from kbuild robot - restrict supported arch to tested ones v2: - fix syscall argument check - fix job finish fence leak since kernel 5.0 - use drm syncobj to replace native fence - move buffer object GPU va map into kernel - reserve syscall argument space for future info - remove kernel gem modifier - switch TTM back to GEM+shmem MM - use time based io poll - use whole register name - adopt gem reservation obj integration - use drm_timeout_abs_to_jiffies Cc: Eric Anholt <eric@anholt.net> Cc: Rob Herring <robh@kernel.org> Cc: Christian König <ckoenig.leichtzumerken@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Rob Clark <robdclark@gmail.com> Cc: Dave Airlie <airlied@gmail.com> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de> Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Signed-off-by: Marek Vasut <marex@denx.de> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Simon Shields <simon@lineageos.org> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Rob Herring <robh@kerrnel.org> Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/291200/
Diffstat (limited to 'drivers/gpu/drm/lima/lima_sched.c')
-rw-r--r--drivers/gpu/drm/lima/lima_sched.c404
1 files changed, 404 insertions, 0 deletions
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
new file mode 100644
index 000000000000..97bd9c1deb87
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,404 @@
1// SPDX-License-Identifier: GPL-2.0 OR MIT
2/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
3
4#include <linux/kthread.h>
5#include <linux/slab.h>
6
7#include "lima_drv.h"
8#include "lima_sched.h"
9#include "lima_vm.h"
10#include "lima_mmu.h"
11#include "lima_l2_cache.h"
12#include "lima_object.h"
13
14struct lima_fence {
15 struct dma_fence base;
16 struct lima_sched_pipe *pipe;
17};
18
19static struct kmem_cache *lima_fence_slab;
20static int lima_fence_slab_refcnt;
21
22int lima_sched_slab_init(void)
23{
24 if (!lima_fence_slab) {
25 lima_fence_slab = kmem_cache_create(
26 "lima_fence", sizeof(struct lima_fence), 0,
27 SLAB_HWCACHE_ALIGN, NULL);
28 if (!lima_fence_slab)
29 return -ENOMEM;
30 }
31
32 lima_fence_slab_refcnt++;
33 return 0;
34}
35
36void lima_sched_slab_fini(void)
37{
38 if (!--lima_fence_slab_refcnt) {
39 kmem_cache_destroy(lima_fence_slab);
40 lima_fence_slab = NULL;
41 }
42}
43
44static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
45{
46 return container_of(fence, struct lima_fence, base);
47}
48
49static const char *lima_fence_get_driver_name(struct dma_fence *fence)
50{
51 return "lima";
52}
53
54static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
55{
56 struct lima_fence *f = to_lima_fence(fence);
57
58 return f->pipe->base.name;
59}
60
61static void lima_fence_release_rcu(struct rcu_head *rcu)
62{
63 struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
64 struct lima_fence *fence = to_lima_fence(f);
65
66 kmem_cache_free(lima_fence_slab, fence);
67}
68
69static void lima_fence_release(struct dma_fence *fence)
70{
71 struct lima_fence *f = to_lima_fence(fence);
72
73 call_rcu(&f->base.rcu, lima_fence_release_rcu);
74}
75
76static const struct dma_fence_ops lima_fence_ops = {
77 .get_driver_name = lima_fence_get_driver_name,
78 .get_timeline_name = lima_fence_get_timeline_name,
79 .release = lima_fence_release,
80};
81
82static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
83{
84 struct lima_fence *fence;
85
86 fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
87 if (!fence)
88 return NULL;
89
90 fence->pipe = pipe;
91 dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
92 pipe->fence_context, ++pipe->fence_seqno);
93
94 return fence;
95}
96
97static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
98{
99 return container_of(job, struct lima_sched_task, base);
100}
101
102static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
103{
104 return container_of(sched, struct lima_sched_pipe, base);
105}
106
107int lima_sched_task_init(struct lima_sched_task *task,
108 struct lima_sched_context *context,
109 struct lima_bo **bos, int num_bos,
110 struct lima_vm *vm)
111{
112 int err, i;
113
114 task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
115 if (!task->bos)
116 return -ENOMEM;
117
118 for (i = 0; i < num_bos; i++)
119 drm_gem_object_get(&bos[i]->gem);
120
121 err = drm_sched_job_init(&task->base, &context->base, vm);
122 if (err) {
123 kfree(task->bos);
124 return err;
125 }
126
127 task->num_bos = num_bos;
128 task->vm = lima_vm_get(vm);
129 return 0;
130}
131
132void lima_sched_task_fini(struct lima_sched_task *task)
133{
134 int i;
135
136 drm_sched_job_cleanup(&task->base);
137
138 for (i = 0; i < task->num_dep; i++)
139 dma_fence_put(task->dep[i]);
140
141 kfree(task->dep);
142
143 if (task->bos) {
144 for (i = 0; i < task->num_bos; i++)
145 drm_gem_object_put_unlocked(&task->bos[i]->gem);
146 kfree(task->bos);
147 }
148
149 lima_vm_put(task->vm);
150}
151
152int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
153{
154 int i, new_dep = 4;
155
156 /* same context's fence is definitly earlier then this task */
157 if (fence->context == task->base.s_fence->finished.context) {
158 dma_fence_put(fence);
159 return 0;
160 }
161
162 if (task->dep && task->num_dep == task->max_dep)
163 new_dep = task->max_dep * 2;
164
165 if (task->max_dep < new_dep) {
166 void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
167
168 if (!dep)
169 return -ENOMEM;
170
171 task->max_dep = new_dep;
172 task->dep = dep;
173 }
174
175 for (i = 0; i < task->num_dep; i++) {
176 if (task->dep[i]->context == fence->context &&
177 dma_fence_is_later(fence, task->dep[i])) {
178 dma_fence_put(task->dep[i]);
179 task->dep[i] = fence;
180 return 0;
181 }
182 }
183
184 task->dep[task->num_dep++] = fence;
185 return 0;
186}
187
188int lima_sched_context_init(struct lima_sched_pipe *pipe,
189 struct lima_sched_context *context,
190 atomic_t *guilty)
191{
192 struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
193
194 return drm_sched_entity_init(&context->base, &rq, 1, guilty);
195}
196
197void lima_sched_context_fini(struct lima_sched_pipe *pipe,
198 struct lima_sched_context *context)
199{
200 drm_sched_entity_fini(&context->base);
201}
202
203struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
204 struct lima_sched_task *task)
205{
206 struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
207
208 drm_sched_entity_push_job(&task->base, &context->base);
209 return fence;
210}
211
212static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
213 struct drm_sched_entity *entity)
214{
215 struct lima_sched_task *task = to_lima_task(job);
216 int i;
217
218 for (i = 0; i < task->num_dep; i++) {
219 struct dma_fence *fence = task->dep[i];
220
221 if (!task->dep[i])
222 continue;
223
224 task->dep[i] = NULL;
225
226 if (!dma_fence_is_signaled(fence))
227 return fence;
228
229 dma_fence_put(fence);
230 }
231
232 return NULL;
233}
234
235static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
236{
237 struct lima_sched_task *task = to_lima_task(job);
238 struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
239 struct lima_fence *fence;
240 struct dma_fence *ret;
241 struct lima_vm *vm = NULL, *last_vm = NULL;
242 int i;
243
244 /* after GPU reset */
245 if (job->s_fence->finished.error < 0)
246 return NULL;
247
248 fence = lima_fence_create(pipe);
249 if (!fence)
250 return NULL;
251 task->fence = &fence->base;
252
253 /* for caller usage of the fence, otherwise irq handler
254 * may consume the fence before caller use it
255 */
256 ret = dma_fence_get(task->fence);
257
258 pipe->current_task = task;
259
260 /* this is needed for MMU to work correctly, otherwise GP/PP
261 * will hang or page fault for unknown reason after running for
262 * a while.
263 *
264 * Need to investigate:
265 * 1. is it related to TLB
266 * 2. how much performance will be affected by L2 cache flush
267 * 3. can we reduce the calling of this function because all
268 * GP/PP use the same L2 cache on mali400
269 *
270 * TODO:
271 * 1. move this to task fini to save some wait time?
272 * 2. when GP/PP use different l2 cache, need PP wait GP l2
273 * cache flush?
274 */
275 for (i = 0; i < pipe->num_l2_cache; i++)
276 lima_l2_cache_flush(pipe->l2_cache[i]);
277
278 if (task->vm != pipe->current_vm) {
279 vm = lima_vm_get(task->vm);
280 last_vm = pipe->current_vm;
281 pipe->current_vm = task->vm;
282 }
283
284 if (pipe->bcast_mmu)
285 lima_mmu_switch_vm(pipe->bcast_mmu, vm);
286 else {
287 for (i = 0; i < pipe->num_mmu; i++)
288 lima_mmu_switch_vm(pipe->mmu[i], vm);
289 }
290
291 if (last_vm)
292 lima_vm_put(last_vm);
293
294 pipe->error = false;
295 pipe->task_run(pipe, task);
296
297 return task->fence;
298}
299
300static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
301 struct lima_sched_task *task)
302{
303 drm_sched_stop(&pipe->base);
304
305 if (task)
306 drm_sched_increase_karma(&task->base);
307
308 pipe->task_error(pipe);
309
310 if (pipe->bcast_mmu)
311 lima_mmu_page_fault_resume(pipe->bcast_mmu);
312 else {
313 int i;
314
315 for (i = 0; i < pipe->num_mmu; i++)
316 lima_mmu_page_fault_resume(pipe->mmu[i]);
317 }
318
319 if (pipe->current_vm)
320 lima_vm_put(pipe->current_vm);
321
322 pipe->current_vm = NULL;
323 pipe->current_task = NULL;
324
325 drm_sched_resubmit_jobs(&pipe->base);
326 drm_sched_start(&pipe->base, true);
327}
328
329static void lima_sched_timedout_job(struct drm_sched_job *job)
330{
331 struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
332 struct lima_sched_task *task = to_lima_task(job);
333
334 DRM_ERROR("lima job timeout\n");
335
336 lima_sched_handle_error_task(pipe, task);
337}
338
339static void lima_sched_free_job(struct drm_sched_job *job)
340{
341 struct lima_sched_task *task = to_lima_task(job);
342 struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
343 struct lima_vm *vm = task->vm;
344 struct lima_bo **bos = task->bos;
345 int i;
346
347 dma_fence_put(task->fence);
348
349 for (i = 0; i < task->num_bos; i++)
350 lima_vm_bo_del(vm, bos[i]);
351
352 lima_sched_task_fini(task);
353 kmem_cache_free(pipe->task_slab, task);
354}
355
356const struct drm_sched_backend_ops lima_sched_ops = {
357 .dependency = lima_sched_dependency,
358 .run_job = lima_sched_run_job,
359 .timedout_job = lima_sched_timedout_job,
360 .free_job = lima_sched_free_job,
361};
362
363static void lima_sched_error_work(struct work_struct *work)
364{
365 struct lima_sched_pipe *pipe =
366 container_of(work, struct lima_sched_pipe, error_work);
367 struct lima_sched_task *task = pipe->current_task;
368
369 lima_sched_handle_error_task(pipe, task);
370}
371
372int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
373{
374 long timeout;
375
376 if (lima_sched_timeout_ms <= 0)
377 timeout = MAX_SCHEDULE_TIMEOUT;
378 else
379 timeout = msecs_to_jiffies(lima_sched_timeout_ms);
380
381 pipe->fence_context = dma_fence_context_alloc(1);
382 spin_lock_init(&pipe->fence_lock);
383
384 INIT_WORK(&pipe->error_work, lima_sched_error_work);
385
386 return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
387}
388
389void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
390{
391 drm_sched_fini(&pipe->base);
392}
393
394void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
395{
396 if (pipe->error)
397 schedule_work(&pipe->error_work);
398 else {
399 struct lima_sched_task *task = pipe->current_task;
400
401 pipe->task_fini(pipe);
402 dma_fence_signal(task->fence);
403 }
404}