aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/lima/lima_pp.c
diff options
context:
space:
mode:
authorQiang Yu <yuq825@gmail.com>2019-03-09 07:20:12 -0500
committerEric Anholt <eric@anholt.net>2019-04-01 13:45:20 -0400
commita1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
treeafee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_pp.c
parent6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)
drm/lima: driver for ARM Mali4xx GPUs
- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for OpenGL vertex shader processing and PP is for fragment shader processing. Each processor has its own MMU so prcessors work in virtual address space. - There's only one GP but multiple PP (max 4 for mali 400 and 8 for mali 450) in the same mali 4xx GPU. All PPs are grouped togather to handle a single fragment shader task divided by FB output tiled pixels. Mali 400 user space driver is responsible for assign target tiled pixels to each PP, but mali 450 has a HW module called DLBU to dynamically balance each PP's load. - User space driver allocate buffer object and map into GPU virtual address space, upload command stream and draw data with CPU mmap of the buffer object, then submit task to GP/PP with a register frame indicating where is the command stream and misc settings. - There's no command stream validation/relocation due to each user process has its own GPU virtual address space. GP/PP's MMU switch virtual address space before running two tasks from different user process. Error or evil user space code just get MMU fault or GP/PP error IRQ, then the HW/SW will be recovered. - Use GEM+shmem for MM. Currently just alloc and pin memory when gem object creation. GPU vm map of the buffer is also done in the alloc stage in kernel space. We may delay the memory allocation and real GPU vm map to command submission stage in the furture as improvement. - Use drm_sched for GPU task schedule. Each OpenGL context should have a lima context object in the kernel to distinguish tasks from different user. drm_sched gets task from each lima context in a fair way. mesa driver can be found here before upstreamed: https://gitlab.freedesktop.org/lima/mesa v8: - add comments for in_sync - fix ctx free miss mutex unlock v7: - remove lima_fence_ops with default value - move fence slab create to device probe - check pad ioctl args to be zero - add comments for user/kernel interface v6: - fix comments by checkpatch.pl v5: - export gp/pp version to userspace - rebase on drm-misc-next v4: - use get param interface to get info - separate context create/free ioctl - remove unused max sched task param - update copyright time - use xarray instead of idr - stop using drmP.h v3: - fix comments from kbuild robot - restrict supported arch to tested ones v2: - fix syscall argument check - fix job finish fence leak since kernel 5.0 - use drm syncobj to replace native fence - move buffer object GPU va map into kernel - reserve syscall argument space for future info - remove kernel gem modifier - switch TTM back to GEM+shmem MM - use time based io poll - use whole register name - adopt gem reservation obj integration - use drm_timeout_abs_to_jiffies Cc: Eric Anholt <eric@anholt.net> Cc: Rob Herring <robh@kernel.org> Cc: Christian König <ckoenig.leichtzumerken@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Rob Clark <robdclark@gmail.com> Cc: Dave Airlie <airlied@gmail.com> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de> Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Signed-off-by: Marek Vasut <marex@denx.de> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Simon Shields <simon@lineageos.org> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Rob Herring <robh@kerrnel.org> Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/291200/
Diffstat (limited to 'drivers/gpu/drm/lima/lima_pp.c')
-rw-r--r--drivers/gpu/drm/lima/lima_pp.c427
1 files changed, 427 insertions, 0 deletions
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
new file mode 100644
index 000000000000..d29721e177bf
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -0,0 +1,427 @@
1// SPDX-License-Identifier: GPL-2.0 OR MIT
2/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
3
4#include <linux/interrupt.h>
5#include <linux/io.h>
6#include <linux/device.h>
7#include <linux/slab.h>
8
9#include <drm/lima_drm.h>
10
11#include "lima_device.h"
12#include "lima_pp.h"
13#include "lima_dlbu.h"
14#include "lima_bcast.h"
15#include "lima_vm.h"
16#include "lima_regs.h"
17
18#define pp_write(reg, data) writel(data, ip->iomem + reg)
19#define pp_read(reg) readl(ip->iomem + reg)
20
21static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
22{
23 struct lima_device *dev = ip->dev;
24 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
25
26 if (state & LIMA_PP_IRQ_MASK_ERROR) {
27 u32 status = pp_read(LIMA_PP_STATUS);
28
29 dev_err(dev->dev, "pp error irq state=%x status=%x\n",
30 state, status);
31
32 pipe->error = true;
33
34 /* mask all interrupts before hard reset */
35 pp_write(LIMA_PP_INT_MASK, 0);
36 }
37
38 pp_write(LIMA_PP_INT_CLEAR, state);
39}
40
41static irqreturn_t lima_pp_irq_handler(int irq, void *data)
42{
43 struct lima_ip *ip = data;
44 struct lima_device *dev = ip->dev;
45 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
46 u32 state = pp_read(LIMA_PP_INT_STATUS);
47
48 /* for shared irq case */
49 if (!state)
50 return IRQ_NONE;
51
52 lima_pp_handle_irq(ip, state);
53
54 if (atomic_dec_and_test(&pipe->task))
55 lima_sched_pipe_task_done(pipe);
56
57 return IRQ_HANDLED;
58}
59
60static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
61{
62 int i;
63 irqreturn_t ret = IRQ_NONE;
64 struct lima_ip *pp_bcast = data;
65 struct lima_device *dev = pp_bcast->dev;
66 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
67 struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
68
69 for (i = 0; i < frame->num_pp; i++) {
70 struct lima_ip *ip = pipe->processor[i];
71 u32 status, state;
72
73 if (pipe->done & (1 << i))
74 continue;
75
76 /* status read first in case int state change in the middle
77 * which may miss the interrupt handling
78 */
79 status = pp_read(LIMA_PP_STATUS);
80 state = pp_read(LIMA_PP_INT_STATUS);
81
82 if (state) {
83 lima_pp_handle_irq(ip, state);
84 ret = IRQ_HANDLED;
85 } else {
86 if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
87 continue;
88 }
89
90 pipe->done |= (1 << i);
91 if (atomic_dec_and_test(&pipe->task))
92 lima_sched_pipe_task_done(pipe);
93 }
94
95 return ret;
96}
97
98static void lima_pp_soft_reset_async(struct lima_ip *ip)
99{
100 if (ip->data.async_reset)
101 return;
102
103 pp_write(LIMA_PP_INT_MASK, 0);
104 pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
105 pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
106 ip->data.async_reset = true;
107}
108
109static int lima_pp_soft_reset_poll(struct lima_ip *ip)
110{
111 return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
112 pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
113}
114
115static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
116{
117 struct lima_device *dev = ip->dev;
118 int ret;
119
120 ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
121 if (ret) {
122 dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
123 return ret;
124 }
125
126 pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
127 pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
128 return 0;
129}
130
131static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
132{
133 int i, err = 0;
134
135 if (!ip->data.async_reset)
136 return 0;
137
138 if (ip->id == lima_ip_pp_bcast) {
139 struct lima_device *dev = ip->dev;
140 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
141 struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
142
143 for (i = 0; i < frame->num_pp; i++)
144 err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
145 } else
146 err = lima_pp_soft_reset_async_wait_one(ip);
147
148 ip->data.async_reset = false;
149 return err;
150}
151
152static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
153{
154 int i, j, n = 0;
155
156 for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
157 writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
158
159 for (i = 0; i < 3; i++) {
160 for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
161 writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
162 }
163}
164
165static int lima_pp_hard_reset_poll(struct lima_ip *ip)
166{
167 pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
168 return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
169}
170
171static int lima_pp_hard_reset(struct lima_ip *ip)
172{
173 struct lima_device *dev = ip->dev;
174 int ret;
175
176 pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
177 pp_write(LIMA_PP_INT_MASK, 0);
178 pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
179 ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
180 if (ret) {
181 dev_err(dev->dev, "pp hard reset timeout\n");
182 return ret;
183 }
184
185 pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
186 pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
187 pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
188 return 0;
189}
190
191static void lima_pp_print_version(struct lima_ip *ip)
192{
193 u32 version, major, minor;
194 char *name;
195
196 version = pp_read(LIMA_PP_VERSION);
197 major = (version >> 8) & 0xFF;
198 minor = version & 0xFF;
199 switch (version >> 16) {
200 case 0xC807:
201 name = "mali200";
202 break;
203 case 0xCE07:
204 name = "mali300";
205 break;
206 case 0xCD07:
207 name = "mali400";
208 break;
209 case 0xCF07:
210 name = "mali450";
211 break;
212 default:
213 name = "unknown";
214 break;
215 }
216 dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
217 lima_ip_name(ip), name, major, minor);
218}
219
220int lima_pp_init(struct lima_ip *ip)
221{
222 struct lima_device *dev = ip->dev;
223 int err;
224
225 lima_pp_print_version(ip);
226
227 ip->data.async_reset = false;
228 lima_pp_soft_reset_async(ip);
229 err = lima_pp_soft_reset_async_wait(ip);
230 if (err)
231 return err;
232
233 err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
234 IRQF_SHARED, lima_ip_name(ip), ip);
235 if (err) {
236 dev_err(dev->dev, "pp %s fail to request irq\n",
237 lima_ip_name(ip));
238 return err;
239 }
240
241 dev->pp_version = pp_read(LIMA_PP_VERSION);
242
243 return 0;
244}
245
246void lima_pp_fini(struct lima_ip *ip)
247{
248
249}
250
251int lima_pp_bcast_init(struct lima_ip *ip)
252{
253 struct lima_device *dev = ip->dev;
254 int err;
255
256 err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
257 IRQF_SHARED, lima_ip_name(ip), ip);
258 if (err) {
259 dev_err(dev->dev, "pp %s fail to request irq\n",
260 lima_ip_name(ip));
261 return err;
262 }
263
264 return 0;
265}
266
267void lima_pp_bcast_fini(struct lima_ip *ip)
268{
269
270}
271
272static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
273 struct lima_sched_task *task)
274{
275 u32 num_pp;
276
277 if (pipe->bcast_processor) {
278 struct drm_lima_m450_pp_frame *f = task->frame;
279
280 num_pp = f->num_pp;
281
282 if (f->_pad)
283 return -EINVAL;
284 } else {
285 struct drm_lima_m400_pp_frame *f = task->frame;
286
287 num_pp = f->num_pp;
288 }
289
290 if (num_pp == 0 || num_pp > pipe->num_processor)
291 return -EINVAL;
292
293 return 0;
294}
295
296static void lima_pp_task_run(struct lima_sched_pipe *pipe,
297 struct lima_sched_task *task)
298{
299 if (pipe->bcast_processor) {
300 struct drm_lima_m450_pp_frame *frame = task->frame;
301 struct lima_device *dev = pipe->bcast_processor->dev;
302 struct lima_ip *ip = pipe->bcast_processor;
303 int i;
304
305 pipe->done = 0;
306 atomic_set(&pipe->task, frame->num_pp);
307
308 if (frame->use_dlbu) {
309 lima_dlbu_enable(dev, frame->num_pp);
310
311 frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
312 lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
313 } else
314 lima_dlbu_disable(dev);
315
316 lima_bcast_enable(dev, frame->num_pp);
317
318 lima_pp_soft_reset_async_wait(ip);
319
320 lima_pp_write_frame(ip, frame->frame, frame->wb);
321
322 for (i = 0; i < frame->num_pp; i++) {
323 struct lima_ip *ip = pipe->processor[i];
324
325 pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
326 if (!frame->use_dlbu)
327 pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
328 }
329
330 pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
331 } else {
332 struct drm_lima_m400_pp_frame *frame = task->frame;
333 int i;
334
335 atomic_set(&pipe->task, frame->num_pp);
336
337 for (i = 0; i < frame->num_pp; i++) {
338 struct lima_ip *ip = pipe->processor[i];
339
340 frame->frame[LIMA_PP_FRAME >> 2] =
341 frame->plbu_array_address[i];
342 frame->frame[LIMA_PP_STACK >> 2] =
343 frame->fragment_stack_address[i];
344
345 lima_pp_soft_reset_async_wait(ip);
346
347 lima_pp_write_frame(ip, frame->frame, frame->wb);
348
349 pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
350 }
351 }
352}
353
354static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
355{
356 if (pipe->bcast_processor)
357 lima_pp_soft_reset_async(pipe->bcast_processor);
358 else {
359 int i;
360
361 for (i = 0; i < pipe->num_processor; i++)
362 lima_pp_soft_reset_async(pipe->processor[i]);
363 }
364}
365
366static void lima_pp_task_error(struct lima_sched_pipe *pipe)
367{
368 int i;
369
370 for (i = 0; i < pipe->num_processor; i++) {
371 struct lima_ip *ip = pipe->processor[i];
372
373 dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
374 i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
375
376 lima_pp_hard_reset(ip);
377 }
378}
379
380static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
381{
382 if (atomic_dec_and_test(&pipe->task))
383 lima_sched_pipe_task_done(pipe);
384}
385
386static struct kmem_cache *lima_pp_task_slab;
387static int lima_pp_task_slab_refcnt;
388
389int lima_pp_pipe_init(struct lima_device *dev)
390{
391 int frame_size;
392 struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
393
394 if (dev->id == lima_gpu_mali400)
395 frame_size = sizeof(struct drm_lima_m400_pp_frame);
396 else
397 frame_size = sizeof(struct drm_lima_m450_pp_frame);
398
399 if (!lima_pp_task_slab) {
400 lima_pp_task_slab = kmem_cache_create_usercopy(
401 "lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
402 0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
403 frame_size, NULL);
404 if (!lima_pp_task_slab)
405 return -ENOMEM;
406 }
407 lima_pp_task_slab_refcnt++;
408
409 pipe->frame_size = frame_size;
410 pipe->task_slab = lima_pp_task_slab;
411
412 pipe->task_validate = lima_pp_task_validate;
413 pipe->task_run = lima_pp_task_run;
414 pipe->task_fini = lima_pp_task_fini;
415 pipe->task_error = lima_pp_task_error;
416 pipe->task_mmu_error = lima_pp_task_mmu_error;
417
418 return 0;
419}
420
421void lima_pp_pipe_fini(struct lima_device *dev)
422{
423 if (!--lima_pp_task_slab_refcnt) {
424 kmem_cache_destroy(lima_pp_task_slab);
425 lima_pp_task_slab = NULL;
426 }
427}