aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/lima/lima_gem.c
diff options
context:
space:
mode:
authorQiang Yu <yuq825@gmail.com>2019-03-09 07:20:12 -0500
committerEric Anholt <eric@anholt.net>2019-04-01 13:45:20 -0400
commita1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
treeafee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_gem.c
parent6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)
drm/lima: driver for ARM Mali4xx GPUs
- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for OpenGL vertex shader processing and PP is for fragment shader processing. Each processor has its own MMU so prcessors work in virtual address space. - There's only one GP but multiple PP (max 4 for mali 400 and 8 for mali 450) in the same mali 4xx GPU. All PPs are grouped togather to handle a single fragment shader task divided by FB output tiled pixels. Mali 400 user space driver is responsible for assign target tiled pixels to each PP, but mali 450 has a HW module called DLBU to dynamically balance each PP's load. - User space driver allocate buffer object and map into GPU virtual address space, upload command stream and draw data with CPU mmap of the buffer object, then submit task to GP/PP with a register frame indicating where is the command stream and misc settings. - There's no command stream validation/relocation due to each user process has its own GPU virtual address space. GP/PP's MMU switch virtual address space before running two tasks from different user process. Error or evil user space code just get MMU fault or GP/PP error IRQ, then the HW/SW will be recovered. - Use GEM+shmem for MM. Currently just alloc and pin memory when gem object creation. GPU vm map of the buffer is also done in the alloc stage in kernel space. We may delay the memory allocation and real GPU vm map to command submission stage in the furture as improvement. - Use drm_sched for GPU task schedule. Each OpenGL context should have a lima context object in the kernel to distinguish tasks from different user. drm_sched gets task from each lima context in a fair way. mesa driver can be found here before upstreamed: https://gitlab.freedesktop.org/lima/mesa v8: - add comments for in_sync - fix ctx free miss mutex unlock v7: - remove lima_fence_ops with default value - move fence slab create to device probe - check pad ioctl args to be zero - add comments for user/kernel interface v6: - fix comments by checkpatch.pl v5: - export gp/pp version to userspace - rebase on drm-misc-next v4: - use get param interface to get info - separate context create/free ioctl - remove unused max sched task param - update copyright time - use xarray instead of idr - stop using drmP.h v3: - fix comments from kbuild robot - restrict supported arch to tested ones v2: - fix syscall argument check - fix job finish fence leak since kernel 5.0 - use drm syncobj to replace native fence - move buffer object GPU va map into kernel - reserve syscall argument space for future info - remove kernel gem modifier - switch TTM back to GEM+shmem MM - use time based io poll - use whole register name - adopt gem reservation obj integration - use drm_timeout_abs_to_jiffies Cc: Eric Anholt <eric@anholt.net> Cc: Rob Herring <robh@kernel.org> Cc: Christian König <ckoenig.leichtzumerken@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Rob Clark <robdclark@gmail.com> Cc: Dave Airlie <airlied@gmail.com> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de> Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Signed-off-by: Marek Vasut <marex@denx.de> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Simon Shields <simon@lineageos.org> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Rob Herring <robh@kerrnel.org> Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/291200/
Diffstat (limited to 'drivers/gpu/drm/lima/lima_gem.c')
-rw-r--r--drivers/gpu/drm/lima/lima_gem.c381
1 files changed, 381 insertions, 0 deletions
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
new file mode 100644
index 000000000000..2d3cf96f6c58
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -0,0 +1,381 @@
1// SPDX-License-Identifier: GPL-2.0 OR MIT
2/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
3
4#include <linux/sync_file.h>
5#include <linux/pfn_t.h>
6
7#include <drm/drm_file.h>
8#include <drm/drm_syncobj.h>
9#include <drm/drm_utils.h>
10
11#include <drm/lima_drm.h>
12
13#include "lima_drv.h"
14#include "lima_gem.h"
15#include "lima_gem_prime.h"
16#include "lima_vm.h"
17#include "lima_object.h"
18
19int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
20 u32 size, u32 flags, u32 *handle)
21{
22 int err;
23 struct lima_bo *bo;
24 struct lima_device *ldev = to_lima_dev(dev);
25
26 bo = lima_bo_create(ldev, size, flags, NULL, NULL);
27 if (IS_ERR(bo))
28 return PTR_ERR(bo);
29
30 err = drm_gem_handle_create(file, &bo->gem, handle);
31
32 /* drop reference from allocate - handle holds it now */
33 drm_gem_object_put_unlocked(&bo->gem);
34
35 return err;
36}
37
38void lima_gem_free_object(struct drm_gem_object *obj)
39{
40 struct lima_bo *bo = to_lima_bo(obj);
41
42 if (!list_empty(&bo->va))
43 dev_err(obj->dev->dev, "lima gem free bo still has va\n");
44
45 lima_bo_destroy(bo);
46}
47
48int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
49{
50 struct lima_bo *bo = to_lima_bo(obj);
51 struct lima_drm_priv *priv = to_lima_drm_priv(file);
52 struct lima_vm *vm = priv->vm;
53
54 return lima_vm_bo_add(vm, bo, true);
55}
56
57void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file)
58{
59 struct lima_bo *bo = to_lima_bo(obj);
60 struct lima_drm_priv *priv = to_lima_drm_priv(file);
61 struct lima_vm *vm = priv->vm;
62
63 lima_vm_bo_del(vm, bo);
64}
65
66int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
67{
68 struct drm_gem_object *obj;
69 struct lima_bo *bo;
70 struct lima_drm_priv *priv = to_lima_drm_priv(file);
71 struct lima_vm *vm = priv->vm;
72 int err;
73
74 obj = drm_gem_object_lookup(file, handle);
75 if (!obj)
76 return -ENOENT;
77
78 bo = to_lima_bo(obj);
79
80 *va = lima_vm_get_va(vm, bo);
81
82 err = drm_gem_create_mmap_offset(obj);
83 if (!err)
84 *offset = drm_vma_node_offset_addr(&obj->vma_node);
85
86 drm_gem_object_put_unlocked(obj);
87 return err;
88}
89
90static vm_fault_t lima_gem_fault(struct vm_fault *vmf)
91{
92 struct vm_area_struct *vma = vmf->vma;
93 struct drm_gem_object *obj = vma->vm_private_data;
94 struct lima_bo *bo = to_lima_bo(obj);
95 pfn_t pfn;
96 pgoff_t pgoff;
97
98 /* We don't use vmf->pgoff since that has the fake offset: */
99 pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
100 pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
101
102 return vmf_insert_mixed(vma, vmf->address, pfn);
103}
104
105const struct vm_operations_struct lima_gem_vm_ops = {
106 .fault = lima_gem_fault,
107 .open = drm_gem_vm_open,
108 .close = drm_gem_vm_close,
109};
110
111void lima_set_vma_flags(struct vm_area_struct *vma)
112{
113 pgprot_t prot = vm_get_page_prot(vma->vm_flags);
114
115 vma->vm_flags |= VM_MIXEDMAP;
116 vma->vm_flags &= ~VM_PFNMAP;
117 vma->vm_page_prot = pgprot_writecombine(prot);
118}
119
120int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma)
121{
122 int ret;
123
124 ret = drm_gem_mmap(filp, vma);
125 if (ret)
126 return ret;
127
128 lima_set_vma_flags(vma);
129 return 0;
130}
131
132static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
133 bool write, bool explicit)
134{
135 int err = 0;
136
137 if (!write) {
138 err = reservation_object_reserve_shared(bo->gem.resv, 1);
139 if (err)
140 return err;
141 }
142
143 /* explicit sync use user passed dep fence */
144 if (explicit)
145 return 0;
146
147 /* implicit sync use bo fence in resv obj */
148 if (write) {
149 unsigned nr_fences;
150 struct dma_fence **fences;
151 int i;
152
153 err = reservation_object_get_fences_rcu(
154 bo->gem.resv, NULL, &nr_fences, &fences);
155 if (err || !nr_fences)
156 return err;
157
158 for (i = 0; i < nr_fences; i++) {
159 err = lima_sched_task_add_dep(task, fences[i]);
160 if (err)
161 break;
162 }
163
164 /* for error case free remaining fences */
165 for ( ; i < nr_fences; i++)
166 dma_fence_put(fences[i]);
167
168 kfree(fences);
169 } else {
170 struct dma_fence *fence;
171
172 fence = reservation_object_get_excl_rcu(bo->gem.resv);
173 if (fence) {
174 err = lima_sched_task_add_dep(task, fence);
175 if (err)
176 dma_fence_put(fence);
177 }
178 }
179
180 return err;
181}
182
183static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
184 struct ww_acquire_ctx *ctx)
185{
186 int i, ret = 0, contended, slow_locked = -1;
187
188 ww_acquire_init(ctx, &reservation_ww_class);
189
190retry:
191 for (i = 0; i < nr_bos; i++) {
192 if (i == slow_locked) {
193 slow_locked = -1;
194 continue;
195 }
196
197 ret = ww_mutex_lock_interruptible(&bos[i]->gem.resv->lock, ctx);
198 if (ret < 0) {
199 contended = i;
200 goto err;
201 }
202 }
203
204 ww_acquire_done(ctx);
205 return 0;
206
207err:
208 for (i--; i >= 0; i--)
209 ww_mutex_unlock(&bos[i]->gem.resv->lock);
210
211 if (slow_locked >= 0)
212 ww_mutex_unlock(&bos[slow_locked]->gem.resv->lock);
213
214 if (ret == -EDEADLK) {
215 /* we lost out in a seqno race, lock and retry.. */
216 ret = ww_mutex_lock_slow_interruptible(
217 &bos[contended]->gem.resv->lock, ctx);
218 if (!ret) {
219 slow_locked = contended;
220 goto retry;
221 }
222 }
223 ww_acquire_fini(ctx);
224
225 return ret;
226}
227
228static void lima_gem_unlock_bos(struct lima_bo **bos, u32 nr_bos,
229 struct ww_acquire_ctx *ctx)
230{
231 int i;
232
233 for (i = 0; i < nr_bos; i++)
234 ww_mutex_unlock(&bos[i]->gem.resv->lock);
235 ww_acquire_fini(ctx);
236}
237
238static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
239{
240 int i, err;
241
242 for (i = 0; i < ARRAY_SIZE(submit->in_sync); i++) {
243 struct dma_fence *fence = NULL;
244
245 if (!submit->in_sync[i])
246 continue;
247
248 err = drm_syncobj_find_fence(file, submit->in_sync[i],
249 0, 0, &fence);
250 if (err)
251 return err;
252
253 err = lima_sched_task_add_dep(submit->task, fence);
254 if (err) {
255 dma_fence_put(fence);
256 return err;
257 }
258 }
259
260 return 0;
261}
262
263int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
264{
265 int i, err = 0;
266 struct ww_acquire_ctx ctx;
267 struct lima_drm_priv *priv = to_lima_drm_priv(file);
268 struct lima_vm *vm = priv->vm;
269 struct drm_syncobj *out_sync = NULL;
270 struct dma_fence *fence;
271 struct lima_bo **bos = submit->lbos;
272
273 if (submit->out_sync) {
274 out_sync = drm_syncobj_find(file, submit->out_sync);
275 if (!out_sync)
276 return -ENOENT;
277 }
278
279 for (i = 0; i < submit->nr_bos; i++) {
280 struct drm_gem_object *obj;
281 struct lima_bo *bo;
282
283 obj = drm_gem_object_lookup(file, submit->bos[i].handle);
284 if (!obj) {
285 err = -ENOENT;
286 goto err_out0;
287 }
288
289 bo = to_lima_bo(obj);
290
291 /* increase refcnt of gpu va map to prevent unmapped when executing,
292 * will be decreased when task done
293 */
294 err = lima_vm_bo_add(vm, bo, false);
295 if (err) {
296 drm_gem_object_put_unlocked(obj);
297 goto err_out0;
298 }
299
300 bos[i] = bo;
301 }
302
303 err = lima_gem_lock_bos(bos, submit->nr_bos, &ctx);
304 if (err)
305 goto err_out0;
306
307 err = lima_sched_task_init(
308 submit->task, submit->ctx->context + submit->pipe,
309 bos, submit->nr_bos, vm);
310 if (err)
311 goto err_out1;
312
313 err = lima_gem_add_deps(file, submit);
314 if (err)
315 goto err_out2;
316
317 for (i = 0; i < submit->nr_bos; i++) {
318 err = lima_gem_sync_bo(
319 submit->task, bos[i],
320 submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE,
321 submit->flags & LIMA_SUBMIT_FLAG_EXPLICIT_FENCE);
322 if (err)
323 goto err_out2;
324 }
325
326 fence = lima_sched_context_queue_task(
327 submit->ctx->context + submit->pipe, submit->task);
328
329 for (i = 0; i < submit->nr_bos; i++) {
330 if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
331 reservation_object_add_excl_fence(bos[i]->gem.resv, fence);
332 else
333 reservation_object_add_shared_fence(bos[i]->gem.resv, fence);
334 }
335
336 lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
337
338 for (i = 0; i < submit->nr_bos; i++)
339 drm_gem_object_put_unlocked(&bos[i]->gem);
340
341 if (out_sync) {
342 drm_syncobj_replace_fence(out_sync, fence);
343 drm_syncobj_put(out_sync);
344 }
345
346 dma_fence_put(fence);
347
348 return 0;
349
350err_out2:
351 lima_sched_task_fini(submit->task);
352err_out1:
353 lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
354err_out0:
355 for (i = 0; i < submit->nr_bos; i++) {
356 if (!bos[i])
357 break;
358 lima_vm_bo_del(vm, bos[i]);
359 drm_gem_object_put_unlocked(&bos[i]->gem);
360 }
361 if (out_sync)
362 drm_syncobj_put(out_sync);
363 return err;
364}
365
366int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns)
367{
368 bool write = op & LIMA_GEM_WAIT_WRITE;
369 long ret, timeout;
370
371 if (!op)
372 return 0;
373
374 timeout = drm_timeout_abs_to_jiffies(timeout_ns);
375
376 ret = drm_gem_reservation_object_wait(file, handle, write, timeout);
377 if (ret == 0)
378 ret = timeout ? -ETIMEDOUT : -EBUSY;
379
380 return ret;
381}