drm/lima: driver for ARM Mali4xx GPUs

- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for OpenGL vertex shader processing and PP is for fragment shader processing. Each processor has its own MMU so prcessors work in virtual address space. - There's only one GP but multiple PP (max 4 for mali 400 and 8 for mali 450) in the same mali 4xx GPU. All PPs are grouped togather to handle a single fragment shader task divided by FB output tiled pixels. Mali 400 user space driver is responsible for assign target tiled pixels to each PP, but mali 450 has a HW module called DLBU to dynamically balance each PP's load. - User space driver allocate buffer object and map into GPU virtual address space, upload command stream and draw data with CPU mmap of the buffer object, then submit task to GP/PP with a register frame indicating where is the command stream and misc settings. - There's no command stream validation/relocation due to each user process has its own GPU virtual address space. GP/PP's MMU switch virtual address space before running two tasks from different user process. Error or evil user space code just get MMU fault or GP/PP error IRQ, then the HW/SW will be recovered. - Use GEM+shmem for MM. Currently just alloc and pin memory when gem object creation. GPU vm map of the buffer is also done in the alloc stage in kernel space. We may delay the memory allocation and real GPU vm map to command submission stage in the furture as improvement. - Use drm_sched for GPU task schedule. Each OpenGL context should have a lima context object in the kernel to distinguish tasks from different user. drm_sched gets task from each lima context in a fair way. mesa driver can be found here before upstreamed: https://gitlab.freedesktop.org/lima/mesa v8: - add comments for in_sync - fix ctx free miss mutex unlock v7: - remove lima_fence_ops with default value - move fence slab create to device probe - check pad ioctl args to be zero - add comments for user/kernel interface v6: - fix comments by checkpatch.pl v5: - export gp/pp version to userspace - rebase on drm-misc-next v4: - use get param interface to get info - separate context create/free ioctl - remove unused max sched task param - update copyright time - use xarray instead of idr - stop using drmP.h v3: - fix comments from kbuild robot - restrict supported arch to tested ones v2: - fix syscall argument check - fix job finish fence leak since kernel 5.0 - use drm syncobj to replace native fence - move buffer object GPU va map into kernel - reserve syscall argument space for future info - remove kernel gem modifier - switch TTM back to GEM+shmem MM - use time based io poll - use whole register name - adopt gem reservation obj integration - use drm_timeout_abs_to_jiffies Cc: Eric Anholt <eric@anholt.net> Cc: Rob Herring <robh@kernel.org> Cc: Christian König <ckoenig.leichtzumerken@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Rob Clark <robdclark@gmail.com> Cc: Dave Airlie <airlied@gmail.com> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de> Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Signed-off-by: Marek Vasut <marex@denx.de> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Simon Shields <simon@lineageos.org> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Rob Herring <robh@kerrnel.org> Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/291200/
author: Qiang Yu <yuq825@gmail.com> 2019-03-09 07:20:12 -0500
committer: Eric Anholt <eric@anholt.net> 2019-04-01 13:45:20 -0400
commit: a1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
tree: afee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_pp.c
parent: 6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)
1 files changed, 427 insertions, 0 deletions
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
new file mode 100644
index 000000000000..d29721e177bf
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <drm/lima_drm.h>
+#include "lima_device.h"
+#include "lima_pp.h"
+#include "lima_dlbu.h"
+#include "lima_bcast.h"
+#include "lima_vm.h"
+#include "lima_regs.h"
+#define pp_write(reg, data) writel(data, ip->iomem + reg)
+#define pp_read(reg) readl(ip->iomem + reg)
+static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
+{
+        struct lima_device *dev = ip->dev;
+        struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+        if (state & LIMA_PP_IRQ_MASK_ERROR) {
+                u32 status = pp_read(LIMA_PP_STATUS);
+                dev_err(dev->dev, "pp error irq state=%x status=%x\n",
+                        state, status);
+                pipe->error = true;
+                /* mask all interrupts before hard reset */
+                pp_write(LIMA_PP_INT_MASK, 0);
+        }
+        pp_write(LIMA_PP_INT_CLEAR, state);
+}
+static irqreturn_t lima_pp_irq_handler(int irq, void *data)
+{
+        struct lima_ip *ip = data;
+        struct lima_device *dev = ip->dev;
+        struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+        u32 state = pp_read(LIMA_PP_INT_STATUS);
+        /* for shared irq case */
+        if (!state)
+                return IRQ_NONE;
+        lima_pp_handle_irq(ip, state);
+        if (atomic_dec_and_test(&pipe->task))
+                lima_sched_pipe_task_done(pipe);
+        return IRQ_HANDLED;
+}
+static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
+{
+        int i;
+        irqreturn_t ret = IRQ_NONE;
+        struct lima_ip *pp_bcast = data;
+        struct lima_device *dev = pp_bcast->dev;
+        struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+        struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+        for (i = 0; i < frame->num_pp; i++) {
+                struct lima_ip *ip = pipe->processor[i];
+                u32 status, state;
+                if (pipe->done & (1 << i))
+                        continue;
+                /* status read first in case int state change in the middle
+                 * which may miss the interrupt handling
+                 */
+                status = pp_read(LIMA_PP_STATUS);
+                state = pp_read(LIMA_PP_INT_STATUS);
+                if (state) {
+                        lima_pp_handle_irq(ip, state);
+                        ret = IRQ_HANDLED;
+                } else {
+                        if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
+                                continue;
+                }
+                pipe->done |= (1 << i);
+                if (atomic_dec_and_test(&pipe->task))
+                        lima_sched_pipe_task_done(pipe);
+        }
+        return ret;
+}
+static void lima_pp_soft_reset_async(struct lima_ip *ip)
+{
+        if (ip->data.async_reset)
+                return;
+        pp_write(LIMA_PP_INT_MASK, 0);
+        pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
+        pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
+        ip->data.async_reset = true;
+}
+static int lima_pp_soft_reset_poll(struct lima_ip *ip)
+{
+        return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
+                pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
+}
+static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
+{
+        struct lima_device *dev = ip->dev;
+        int ret;
+        ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
+        if (ret) {
+                dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
+                return ret;
+        }
+        pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
+        pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
+        return 0;
+}
+static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
+{
+        int i, err = 0;
+        if (!ip->data.async_reset)
+                return 0;
+        if (ip->id == lima_ip_pp_bcast) {
+                struct lima_device *dev = ip->dev;
+                struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+                struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+                for (i = 0; i < frame->num_pp; i++)
+                        err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
+        } else
+                err = lima_pp_soft_reset_async_wait_one(ip);
+        ip->data.async_reset = false;
+        return err;
+}
+static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
+{
+        int i, j, n = 0;
+        for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
+                writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
+        for (i = 0; i < 3; i++) {
+                for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
+                        writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
+        }
+}
+static int lima_pp_hard_reset_poll(struct lima_ip *ip)
+{
+        pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
+        return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
+}
+static int lima_pp_hard_reset(struct lima_ip *ip)
+{
+        struct lima_device *dev = ip->dev;
+        int ret;
+        pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
+        pp_write(LIMA_PP_INT_MASK, 0);
+        pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
+        ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
+        if (ret) {
+                dev_err(dev->dev, "pp hard reset timeout\n");
+                return ret;
+        }
+        pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
+        pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
+        pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
+        return 0;
+}
+static void lima_pp_print_version(struct lima_ip *ip)
+{
+        u32 version, major, minor;
+        char *name;
+        version = pp_read(LIMA_PP_VERSION);
+        major = (version >> 8) & 0xFF;
+        minor = version & 0xFF;
+        switch (version >> 16) {
+        case 0xC807:
+            name = "mali200";
+                break;
+        case 0xCE07:
+                name = "mali300";
+                break;
+        case 0xCD07:
+                name = "mali400";
+                break;
+        case 0xCF07:
+                name = "mali450";
+                break;
+        default:
+                name = "unknown";
+                break;
+        }
+        dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
+                 lima_ip_name(ip), name, major, minor);
+}
+int lima_pp_init(struct lima_ip *ip)
+{
+        struct lima_device *dev = ip->dev;
+        int err;
+        lima_pp_print_version(ip);
+        ip->data.async_reset = false;
+        lima_pp_soft_reset_async(ip);
+        err = lima_pp_soft_reset_async_wait(ip);
+        if (err)
+                return err;
+        err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
+                               IRQF_SHARED, lima_ip_name(ip), ip);
+        if (err) {
+                dev_err(dev->dev, "pp %s fail to request irq\n",
+                        lima_ip_name(ip));
+                return err;
+        }
+        dev->pp_version = pp_read(LIMA_PP_VERSION);
+        return 0;
+}
+void lima_pp_fini(struct lima_ip *ip)
+{
+}
+int lima_pp_bcast_init(struct lima_ip *ip)
+{
+        struct lima_device *dev = ip->dev;
+        int err;
+        err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
+                               IRQF_SHARED, lima_ip_name(ip), ip);
+        if (err) {
+                dev_err(dev->dev, "pp %s fail to request irq\n",
+                        lima_ip_name(ip));
+                return err;
+        }
+        return 0;
+}
+void lima_pp_bcast_fini(struct lima_ip *ip)
+{
+}
+static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
+                                 struct lima_sched_task *task)
+{
+        u32 num_pp;
+        if (pipe->bcast_processor) {
+                struct drm_lima_m450_pp_frame *f = task->frame;
+                num_pp = f->num_pp;
+                if (f->_pad)
+                        return -EINVAL;
+        } else {
+                struct drm_lima_m400_pp_frame *f = task->frame;
+                num_pp = f->num_pp;
+        }
+        if (num_pp == 0 || num_pp > pipe->num_processor)
+                return -EINVAL;
+        return 0;
+}
+static void lima_pp_task_run(struct lima_sched_pipe *pipe,
+                             struct lima_sched_task *task)
+{
+        if (pipe->bcast_processor) {
+                struct drm_lima_m450_pp_frame *frame = task->frame;
+                struct lima_device *dev = pipe->bcast_processor->dev;
+                struct lima_ip *ip = pipe->bcast_processor;
+                int i;
+                pipe->done = 0;
+                atomic_set(&pipe->task, frame->num_pp);
+                if (frame->use_dlbu) {
+                        lima_dlbu_enable(dev, frame->num_pp);
+                        frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
+                        lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
+                } else
+                        lima_dlbu_disable(dev);
+                lima_bcast_enable(dev, frame->num_pp);
+                lima_pp_soft_reset_async_wait(ip);
+                lima_pp_write_frame(ip, frame->frame, frame->wb);
+                for (i = 0; i < frame->num_pp; i++) {
+                        struct lima_ip *ip = pipe->processor[i];
+                        pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
+                        if (!frame->use_dlbu)
+                                pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
+                }
+                pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
+        } else {
+                struct drm_lima_m400_pp_frame *frame = task->frame;
+                int i;
+                atomic_set(&pipe->task, frame->num_pp);
+                for (i = 0; i < frame->num_pp; i++) {
+                        struct lima_ip *ip = pipe->processor[i];
+                        frame->frame[LIMA_PP_FRAME >> 2] =
+                                frame->plbu_array_address[i];
+                        frame->frame[LIMA_PP_STACK >> 2] =
+                                frame->fragment_stack_address[i];
+                        lima_pp_soft_reset_async_wait(ip);
+                        lima_pp_write_frame(ip, frame->frame, frame->wb);
+                        pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
+                }
+        }
+}
+static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
+{
+        if (pipe->bcast_processor)
+                lima_pp_soft_reset_async(pipe->bcast_processor);
+        else {
+                int i;
+                for (i = 0; i < pipe->num_processor; i++)
+                        lima_pp_soft_reset_async(pipe->processor[i]);
+        }
+}
+static void lima_pp_task_error(struct lima_sched_pipe *pipe)
+{
+        int i;
+        for (i = 0; i < pipe->num_processor; i++) {
+                struct lima_ip *ip = pipe->processor[i];
+                dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
+                        i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
+                lima_pp_hard_reset(ip);
+        }
+}
+static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
+{
+        if (atomic_dec_and_test(&pipe->task))
+                lima_sched_pipe_task_done(pipe);
+}
+static struct kmem_cache *lima_pp_task_slab;
+static int lima_pp_task_slab_refcnt;
+int lima_pp_pipe_init(struct lima_device *dev)
+{
+        int frame_size;
+        struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+        if (dev->id == lima_gpu_mali400)
+                frame_size = sizeof(struct drm_lima_m400_pp_frame);
+        else
+                frame_size = sizeof(struct drm_lima_m450_pp_frame);
+        if (!lima_pp_task_slab) {
+                lima_pp_task_slab = kmem_cache_create_usercopy(
+                        "lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
+                        0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
+                        frame_size, NULL);
+                if (!lima_pp_task_slab)
+                        return -ENOMEM;
+        }
+        lima_pp_task_slab_refcnt++;
+        pipe->frame_size = frame_size;
+        pipe->task_slab = lima_pp_task_slab;
+        pipe->task_validate = lima_pp_task_validate;
+        pipe->task_run = lima_pp_task_run;
+        pipe->task_fini = lima_pp_task_fini;
+        pipe->task_error = lima_pp_task_error;
+        pipe->task_mmu_error = lima_pp_task_mmu_error;
+        return 0;
+}
+void lima_pp_pipe_fini(struct lima_device *dev)
+{
+        if (!--lima_pp_task_slab_refcnt) {
+                kmem_cache_destroy(lima_pp_task_slab);
+                lima_pp_task_slab = NULL;
+        }
+}
author	Qiang Yu <yuq825@gmail.com>	2019-03-09 07:20:12 -0500
committer	Eric Anholt <eric@anholt.net>	2019-04-01 13:45:20 -0400
commit	a1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
tree	afee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_pp.c
parent	6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)

diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c new file mode 100644 index 000000000000..d29721e177bf --- /dev/null +++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -0,0 +1,427 @@
	1	// SPDX-License-Identifier: GPL-2.0 OR MIT
	2	/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
	3
	4	#include <linux/interrupt.h>
	5	#include <linux/io.h>
	6	#include <linux/device.h>
	7	#include <linux/slab.h>
	8
	9	#include <drm/lima_drm.h>
	10
	11	#include "lima_device.h"
	12	#include "lima_pp.h"
	13	#include "lima_dlbu.h"
	14	#include "lima_bcast.h"
	15	#include "lima_vm.h"
	16	#include "lima_regs.h"
	17
	18	#define pp_write(reg, data) writel(data, ip->iomem + reg)
	19	#define pp_read(reg) readl(ip->iomem + reg)
	20
	21	static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
	22	{
	23	struct lima_device *dev = ip->dev;
	24	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
	25
	26	if (state & LIMA_PP_IRQ_MASK_ERROR) {
	27	u32 status = pp_read(LIMA_PP_STATUS);
	28
	29	dev_err(dev->dev, "pp error irq state=%x status=%x\n",
	30	state, status);
	31
	32	pipe->error = true;
	33
	34	/* mask all interrupts before hard reset */
	35	pp_write(LIMA_PP_INT_MASK, 0);
	36	}
	37
	38	pp_write(LIMA_PP_INT_CLEAR, state);
	39	}
	40
	41	static irqreturn_t lima_pp_irq_handler(int irq, void *data)
	42	{
	43	struct lima_ip *ip = data;
	44	struct lima_device *dev = ip->dev;
	45	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
	46	u32 state = pp_read(LIMA_PP_INT_STATUS);
	47
	48	/* for shared irq case */
	49	if (!state)
	50	return IRQ_NONE;
	51
	52	lima_pp_handle_irq(ip, state);
	53
	54	if (atomic_dec_and_test(&pipe->task))
	55	lima_sched_pipe_task_done(pipe);
	56
	57	return IRQ_HANDLED;
	58	}
	59
	60	static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
	61	{
	62	int i;
	63	irqreturn_t ret = IRQ_NONE;
	64	struct lima_ip *pp_bcast = data;
	65	struct lima_device *dev = pp_bcast->dev;
	66	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
	67	struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
	68
	69	for (i = 0; i < frame->num_pp; i++) {
	70	struct lima_ip *ip = pipe->processor[i];
	71	u32 status, state;
	72
	73	if (pipe->done & (1 << i))
	74	continue;
	75
	76	/* status read first in case int state change in the middle
	77	* which may miss the interrupt handling
	78	*/
	79	status = pp_read(LIMA_PP_STATUS);
	80	state = pp_read(LIMA_PP_INT_STATUS);
	81
	82	if (state) {
	83	lima_pp_handle_irq(ip, state);
	84	ret = IRQ_HANDLED;
	85	} else {
	86	if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
	87	continue;
	88	}
	89
	90	pipe->done \|= (1 << i);
	91	if (atomic_dec_and_test(&pipe->task))
	92	lima_sched_pipe_task_done(pipe);
	93	}
	94
	95	return ret;
	96	}
	97
	98	static void lima_pp_soft_reset_async(struct lima_ip *ip)
	99	{
	100	if (ip->data.async_reset)
	101	return;
	102
	103	pp_write(LIMA_PP_INT_MASK, 0);
	104	pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
	105	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
	106	ip->data.async_reset = true;
	107	}
	108
	109	static int lima_pp_soft_reset_poll(struct lima_ip *ip)
	110	{
	111	return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
	112	pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
	113	}
	114
	115	static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
	116	{
	117	struct lima_device *dev = ip->dev;
	118	int ret;
	119
	120	ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
	121	if (ret) {
	122	dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
	123	return ret;
	124	}
	125
	126	pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
	127	pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
	128	return 0;
	129	}
	130
	131	static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
	132	{
	133	int i, err = 0;
	134
	135	if (!ip->data.async_reset)
	136	return 0;
	137
	138	if (ip->id == lima_ip_pp_bcast) {
	139	struct lima_device *dev = ip->dev;
	140	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
	141	struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
	142
	143	for (i = 0; i < frame->num_pp; i++)
	144	err \|= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
	145	} else
	146	err = lima_pp_soft_reset_async_wait_one(ip);
	147
	148	ip->data.async_reset = false;
	149	return err;
	150	}
	151
	152	static void lima_pp_write_frame(struct lima_ip ip, u32 frame, u32 *wb)
	153	{
	154	int i, j, n = 0;
	155
	156	for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
	157	writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
	158
	159	for (i = 0; i < 3; i++) {
	160	for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
	161	writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
	162	}
	163	}
	164
	165	static int lima_pp_hard_reset_poll(struct lima_ip *ip)
	166	{
	167	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
	168	return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
	169	}
	170
	171	static int lima_pp_hard_reset(struct lima_ip *ip)
	172	{
	173	struct lima_device *dev = ip->dev;
	174	int ret;
	175
	176	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
	177	pp_write(LIMA_PP_INT_MASK, 0);
	178	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
	179	ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
	180	if (ret) {
	181	dev_err(dev->dev, "pp hard reset timeout\n");
	182	return ret;
	183	}
	184
	185	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
	186	pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
	187	pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
	188	return 0;
	189	}
	190
	191	static void lima_pp_print_version(struct lima_ip *ip)
	192	{
	193	u32 version, major, minor;
	194	char *name;
	195
	196	version = pp_read(LIMA_PP_VERSION);
	197	major = (version >> 8) & 0xFF;
	198	minor = version & 0xFF;
	199	switch (version >> 16) {
	200	case 0xC807:
	201	name = "mali200";
	202	break;
	203	case 0xCE07:
	204	name = "mali300";
	205	break;
	206	case 0xCD07:
	207	name = "mali400";
	208	break;
	209	case 0xCF07:
	210	name = "mali450";
	211	break;
	212	default:
	213	name = "unknown";
	214	break;
	215	}
	216	dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
	217	lima_ip_name(ip), name, major, minor);
	218	}
	219
	220	int lima_pp_init(struct lima_ip *ip)
	221	{
	222	struct lima_device *dev = ip->dev;
	223	int err;
	224
	225	lima_pp_print_version(ip);
	226
	227	ip->data.async_reset = false;
	228	lima_pp_soft_reset_async(ip);
	229	err = lima_pp_soft_reset_async_wait(ip);
	230	if (err)
	231	return err;
	232
	233	err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
	234	IRQF_SHARED, lima_ip_name(ip), ip);
	235	if (err) {
	236	dev_err(dev->dev, "pp %s fail to request irq\n",
	237	lima_ip_name(ip));
	238	return err;
	239	}
	240
	241	dev->pp_version = pp_read(LIMA_PP_VERSION);
	242
	243	return 0;
	244	}
	245
	246	void lima_pp_fini(struct lima_ip *ip)
	247	{
	248
	249	}
	250
	251	int lima_pp_bcast_init(struct lima_ip *ip)
	252	{
	253	struct lima_device *dev = ip->dev;
	254	int err;
	255
	256	err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
	257	IRQF_SHARED, lima_ip_name(ip), ip);
	258	if (err) {
	259	dev_err(dev->dev, "pp %s fail to request irq\n",
	260	lima_ip_name(ip));
	261	return err;
	262	}
	263
	264	return 0;
	265	}
	266
	267	void lima_pp_bcast_fini(struct lima_ip *ip)
	268	{
	269
	270	}
	271
	272	static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
	273	struct lima_sched_task *task)
	274	{
	275	u32 num_pp;
	276
	277	if (pipe->bcast_processor) {
	278	struct drm_lima_m450_pp_frame *f = task->frame;
	279
	280	num_pp = f->num_pp;
	281
	282	if (f->_pad)
	283	return -EINVAL;
	284	} else {
	285	struct drm_lima_m400_pp_frame *f = task->frame;
	286
	287	num_pp = f->num_pp;
	288	}
	289
	290	if (num_pp == 0 \|\| num_pp > pipe->num_processor)
	291	return -EINVAL;
	292
	293	return 0;
	294	}
	295
	296	static void lima_pp_task_run(struct lima_sched_pipe *pipe,
	297	struct lima_sched_task *task)
	298	{
	299	if (pipe->bcast_processor) {
	300	struct drm_lima_m450_pp_frame *frame = task->frame;
	301	struct lima_device *dev = pipe->bcast_processor->dev;
	302	struct lima_ip *ip = pipe->bcast_processor;
	303	int i;
	304
	305	pipe->done = 0;
	306	atomic_set(&pipe->task, frame->num_pp);
	307
	308	if (frame->use_dlbu) {
	309	lima_dlbu_enable(dev, frame->num_pp);
	310
	311	frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
	312	lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
	313	} else
	314	lima_dlbu_disable(dev);
	315
	316	lima_bcast_enable(dev, frame->num_pp);
	317
	318	lima_pp_soft_reset_async_wait(ip);
	319
	320	lima_pp_write_frame(ip, frame->frame, frame->wb);
	321
	322	for (i = 0; i < frame->num_pp; i++) {
	323	struct lima_ip *ip = pipe->processor[i];
	324
	325	pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
	326	if (!frame->use_dlbu)
	327	pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
	328	}
	329
	330	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
	331	} else {
	332	struct drm_lima_m400_pp_frame *frame = task->frame;
	333	int i;
	334
	335	atomic_set(&pipe->task, frame->num_pp);
	336
	337	for (i = 0; i < frame->num_pp; i++) {
	338	struct lima_ip *ip = pipe->processor[i];
	339
	340	frame->frame[LIMA_PP_FRAME >> 2] =
	341	frame->plbu_array_address[i];
	342	frame->frame[LIMA_PP_STACK >> 2] =
	343	frame->fragment_stack_address[i];
	344
	345	lima_pp_soft_reset_async_wait(ip);
	346
	347	lima_pp_write_frame(ip, frame->frame, frame->wb);
	348
	349	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
	350	}
	351	}
	352	}
	353
	354	static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
	355	{
	356	if (pipe->bcast_processor)
	357	lima_pp_soft_reset_async(pipe->bcast_processor);
	358	else {
	359	int i;
	360
	361	for (i = 0; i < pipe->num_processor; i++)
	362	lima_pp_soft_reset_async(pipe->processor[i]);
	363	}
	364	}
	365
	366	static void lima_pp_task_error(struct lima_sched_pipe *pipe)
	367	{
	368	int i;
	369
	370	for (i = 0; i < pipe->num_processor; i++) {
	371	struct lima_ip *ip = pipe->processor[i];
	372
	373	dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
	374	i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
	375
	376	lima_pp_hard_reset(ip);
	377	}
	378	}
	379
	380	static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
	381	{
	382	if (atomic_dec_and_test(&pipe->task))
	383	lima_sched_pipe_task_done(pipe);
	384	}
	385
	386	static struct kmem_cache *lima_pp_task_slab;
	387	static int lima_pp_task_slab_refcnt;
	388
	389	int lima_pp_pipe_init(struct lima_device *dev)
	390	{
	391	int frame_size;
	392	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
	393
	394	if (dev->id == lima_gpu_mali400)
	395	frame_size = sizeof(struct drm_lima_m400_pp_frame);
	396	else
	397	frame_size = sizeof(struct drm_lima_m450_pp_frame);
	398
	399	if (!lima_pp_task_slab) {
	400	lima_pp_task_slab = kmem_cache_create_usercopy(
	401	"lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
	402	0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
	403	frame_size, NULL);
	404	if (!lima_pp_task_slab)
	405	return -ENOMEM;
	406	}
	407	lima_pp_task_slab_refcnt++;
	408
	409	pipe->frame_size = frame_size;
	410	pipe->task_slab = lima_pp_task_slab;
	411
	412	pipe->task_validate = lima_pp_task_validate;
	413	pipe->task_run = lima_pp_task_run;
	414	pipe->task_fini = lima_pp_task_fini;
	415	pipe->task_error = lima_pp_task_error;
	416	pipe->task_mmu_error = lima_pp_task_mmu_error;
	417
	418	return 0;
	419	}
	420
	421	void lima_pp_pipe_fini(struct lima_device *dev)
	422	{
	423	if (!--lima_pp_task_slab_refcnt) {
	424	kmem_cache_destroy(lima_pp_task_slab);
	425	lima_pp_task_slab = NULL;
	426	}
	427	}