drm/lima: driver for ARM Mali4xx GPUs

- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for OpenGL vertex shader processing and PP is for fragment shader processing. Each processor has its own MMU so prcessors work in virtual address space. - There's only one GP but multiple PP (max 4 for mali 400 and 8 for mali 450) in the same mali 4xx GPU. All PPs are grouped togather to handle a single fragment shader task divided by FB output tiled pixels. Mali 400 user space driver is responsible for assign target tiled pixels to each PP, but mali 450 has a HW module called DLBU to dynamically balance each PP's load. - User space driver allocate buffer object and map into GPU virtual address space, upload command stream and draw data with CPU mmap of the buffer object, then submit task to GP/PP with a register frame indicating where is the command stream and misc settings. - There's no command stream validation/relocation due to each user process has its own GPU virtual address space. GP/PP's MMU switch virtual address space before running two tasks from different user process. Error or evil user space code just get MMU fault or GP/PP error IRQ, then the HW/SW will be recovered. - Use GEM+shmem for MM. Currently just alloc and pin memory when gem object creation. GPU vm map of the buffer is also done in the alloc stage in kernel space. We may delay the memory allocation and real GPU vm map to command submission stage in the furture as improvement. - Use drm_sched for GPU task schedule. Each OpenGL context should have a lima context object in the kernel to distinguish tasks from different user. drm_sched gets task from each lima context in a fair way. mesa driver can be found here before upstreamed: https://gitlab.freedesktop.org/lima/mesa v8: - add comments for in_sync - fix ctx free miss mutex unlock v7: - remove lima_fence_ops with default value - move fence slab create to device probe - check pad ioctl args to be zero - add comments for user/kernel interface v6: - fix comments by checkpatch.pl v5: - export gp/pp version to userspace - rebase on drm-misc-next v4: - use get param interface to get info - separate context create/free ioctl - remove unused max sched task param - update copyright time - use xarray instead of idr - stop using drmP.h v3: - fix comments from kbuild robot - restrict supported arch to tested ones v2: - fix syscall argument check - fix job finish fence leak since kernel 5.0 - use drm syncobj to replace native fence - move buffer object GPU va map into kernel - reserve syscall argument space for future info - remove kernel gem modifier - switch TTM back to GEM+shmem MM - use time based io poll - use whole register name - adopt gem reservation obj integration - use drm_timeout_abs_to_jiffies Cc: Eric Anholt <eric@anholt.net> Cc: Rob Herring <robh@kernel.org> Cc: Christian König <ckoenig.leichtzumerken@gmail.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Rob Clark <robdclark@gmail.com> Cc: Dave Airlie <airlied@gmail.com> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de> Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Signed-off-by: Marek Vasut <marex@denx.de> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Simon Shields <simon@lineageos.org> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Rob Herring <robh@kerrnel.org> Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/291200/
author: Qiang Yu <yuq825@gmail.com> 2019-03-09 07:20:12 -0500
committer: Eric Anholt <eric@anholt.net> 2019-04-01 13:45:20 -0400
commit: a1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
tree: afee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_sched.c
parent: 6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)
1 files changed, 404 insertions, 0 deletions
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
new file mode 100644
index 000000000000..97bd9c1deb87
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include "lima_drv.h"
+#include "lima_sched.h"
+#include "lima_vm.h"
+#include "lima_mmu.h"
+#include "lima_l2_cache.h"
+#include "lima_object.h"
+struct lima_fence {
+        struct dma_fence base;
+        struct lima_sched_pipe *pipe;
+};
+static struct kmem_cache *lima_fence_slab;
+static int lima_fence_slab_refcnt;
+int lima_sched_slab_init(void)
+{
+        if (!lima_fence_slab) {
+                lima_fence_slab = kmem_cache_create(
+                        "lima_fence", sizeof(struct lima_fence), 0,
+                        SLAB_HWCACHE_ALIGN, NULL);
+                if (!lima_fence_slab)
+                        return -ENOMEM;
+        }
+        lima_fence_slab_refcnt++;
+        return 0;
+}
+void lima_sched_slab_fini(void)
+{
+        if (!--lima_fence_slab_refcnt) {
+                kmem_cache_destroy(lima_fence_slab);
+                lima_fence_slab = NULL;
+        }
+}
+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
+{
+        return container_of(fence, struct lima_fence, base);
+}
+static const char *lima_fence_get_driver_name(struct dma_fence *fence)
+{
+        return "lima";
+}
+static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
+{
+        struct lima_fence *f = to_lima_fence(fence);
+        return f->pipe->base.name;
+}
+static void lima_fence_release_rcu(struct rcu_head *rcu)
+{
+        struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+        struct lima_fence *fence = to_lima_fence(f);
+        kmem_cache_free(lima_fence_slab, fence);
+}
+static void lima_fence_release(struct dma_fence *fence)
+{
+        struct lima_fence *f = to_lima_fence(fence);
+        call_rcu(&f->base.rcu, lima_fence_release_rcu);
+}
+static const struct dma_fence_ops lima_fence_ops = {
+        .get_driver_name = lima_fence_get_driver_name,
+        .get_timeline_name = lima_fence_get_timeline_name,
+        .release = lima_fence_release,
+};
+static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
+{
+        struct lima_fence *fence;
+        fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
+        if (!fence)
+                return NULL;
+        fence->pipe = pipe;
+        dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
+                       pipe->fence_context, ++pipe->fence_seqno);
+        return fence;
+}
+static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
+{
+        return container_of(job, struct lima_sched_task, base);
+}
+static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
+{
+        return container_of(sched, struct lima_sched_pipe, base);
+}
+int lima_sched_task_init(struct lima_sched_task *task,
+                         struct lima_sched_context *context,
+                         struct lima_bo **bos, int num_bos,
+                         struct lima_vm *vm)
+{
+        int err, i;
+        task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
+        if (!task->bos)
+                return -ENOMEM;
+        for (i = 0; i < num_bos; i++)
+                drm_gem_object_get(&bos[i]->gem);
+        err = drm_sched_job_init(&task->base, &context->base, vm);
+        if (err) {
+                kfree(task->bos);
+                return err;
+        }
+        task->num_bos = num_bos;
+        task->vm = lima_vm_get(vm);
+        return 0;
+}
+void lima_sched_task_fini(struct lima_sched_task *task)
+{
+        int i;
+        drm_sched_job_cleanup(&task->base);
+        for (i = 0; i < task->num_dep; i++)
+                dma_fence_put(task->dep[i]);
+        kfree(task->dep);
+        if (task->bos) {
+                for (i = 0; i < task->num_bos; i++)
+                        drm_gem_object_put_unlocked(&task->bos[i]->gem);
+                kfree(task->bos);
+        }
+        lima_vm_put(task->vm);
+}
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
+{
+        int i, new_dep = 4;
+        /* same context's fence is definitly earlier then this task */
+        if (fence->context == task->base.s_fence->finished.context) {
+                dma_fence_put(fence);
+                return 0;
+        }
+        if (task->dep && task->num_dep == task->max_dep)
+                new_dep = task->max_dep * 2;
+        if (task->max_dep < new_dep) {
+                void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
+                if (!dep)
+                        return -ENOMEM;
+                task->max_dep = new_dep;
+                task->dep = dep;
+        }
+        for (i = 0; i < task->num_dep; i++) {
+                if (task->dep[i]->context == fence->context &&
+                    dma_fence_is_later(fence, task->dep[i])) {
+                        dma_fence_put(task->dep[i]);
+                        task->dep[i] = fence;
+                        return 0;
+                }
+        }
+        task->dep[task->num_dep++] = fence;
+        return 0;
+}
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+                            struct lima_sched_context *context,
+                            atomic_t *guilty)
+{
+        struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
+        return drm_sched_entity_init(&context->base, &rq, 1, guilty);
+}
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+                             struct lima_sched_context *context)
+{
+        drm_sched_entity_fini(&context->base);
+}
+struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
+                                                struct lima_sched_task *task)
+{
+        struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
+        drm_sched_entity_push_job(&task->base, &context->base);
+        return fence;
+}
+static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
+                                               struct drm_sched_entity *entity)
+{
+        struct lima_sched_task *task = to_lima_task(job);
+        int i;
+        for (i = 0; i < task->num_dep; i++) {
+                struct dma_fence *fence = task->dep[i];
+                if (!task->dep[i])
+                        continue;
+                task->dep[i] = NULL;
+                if (!dma_fence_is_signaled(fence))
+                        return fence;
+                dma_fence_put(fence);
+        }
+        return NULL;
+}
+static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
+{
+        struct lima_sched_task *task = to_lima_task(job);
+        struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+        struct lima_fence *fence;
+        struct dma_fence *ret;
+        struct lima_vm *vm = NULL, *last_vm = NULL;
+        int i;
+        /* after GPU reset */
+        if (job->s_fence->finished.error < 0)
+                return NULL;
+        fence = lima_fence_create(pipe);
+        if (!fence)
+                return NULL;
+        task->fence = &fence->base;
+        /* for caller usage of the fence, otherwise irq handler
+         * may consume the fence before caller use it
+         */
+        ret = dma_fence_get(task->fence);
+        pipe->current_task = task;
+        /* this is needed for MMU to work correctly, otherwise GP/PP
+         * will hang or page fault for unknown reason after running for
+         * a while.
+         *
+         * Need to investigate:
+         * 1. is it related to TLB
+         * 2. how much performance will be affected by L2 cache flush
+         * 3. can we reduce the calling of this function because all
+         *    GP/PP use the same L2 cache on mali400
+         *
+         * TODO:
+         * 1. move this to task fini to save some wait time?
+         * 2. when GP/PP use different l2 cache, need PP wait GP l2
+         *    cache flush?
+         */
+        for (i = 0; i < pipe->num_l2_cache; i++)
+                lima_l2_cache_flush(pipe->l2_cache[i]);
+        if (task->vm != pipe->current_vm) {
+                vm = lima_vm_get(task->vm);
+                last_vm = pipe->current_vm;
+                pipe->current_vm = task->vm;
+        }
+        if (pipe->bcast_mmu)
+                lima_mmu_switch_vm(pipe->bcast_mmu, vm);
+        else {
+                for (i = 0; i < pipe->num_mmu; i++)
+                        lima_mmu_switch_vm(pipe->mmu[i], vm);
+        }
+        if (last_vm)
+                lima_vm_put(last_vm);
+        pipe->error = false;
+        pipe->task_run(pipe, task);
+        return task->fence;
+}
+static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
+                                         struct lima_sched_task *task)
+{
+        drm_sched_stop(&pipe->base);
+        if (task)
+                drm_sched_increase_karma(&task->base);
+        pipe->task_error(pipe);
+        if (pipe->bcast_mmu)
+                lima_mmu_page_fault_resume(pipe->bcast_mmu);
+        else {
+                int i;
+                for (i = 0; i < pipe->num_mmu; i++)
+                        lima_mmu_page_fault_resume(pipe->mmu[i]);
+        }
+        if (pipe->current_vm)
+                lima_vm_put(pipe->current_vm);
+        pipe->current_vm = NULL;
+        pipe->current_task = NULL;
+        drm_sched_resubmit_jobs(&pipe->base);
+        drm_sched_start(&pipe->base, true);
+}
+static void lima_sched_timedout_job(struct drm_sched_job *job)
+{
+        struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+        struct lima_sched_task *task = to_lima_task(job);
+        DRM_ERROR("lima job timeout\n");
+        lima_sched_handle_error_task(pipe, task);
+}
+static void lima_sched_free_job(struct drm_sched_job *job)
+{
+        struct lima_sched_task *task = to_lima_task(job);
+        struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+        struct lima_vm *vm = task->vm;
+        struct lima_bo **bos = task->bos;
+        int i;
+        dma_fence_put(task->fence);
+        for (i = 0; i < task->num_bos; i++)
+                lima_vm_bo_del(vm, bos[i]);
+        lima_sched_task_fini(task);
+        kmem_cache_free(pipe->task_slab, task);
+}
+const struct drm_sched_backend_ops lima_sched_ops = {
+        .dependency = lima_sched_dependency,
+        .run_job = lima_sched_run_job,
+        .timedout_job = lima_sched_timedout_job,
+        .free_job = lima_sched_free_job,
+};
+static void lima_sched_error_work(struct work_struct *work)
+{
+        struct lima_sched_pipe *pipe =
+                container_of(work, struct lima_sched_pipe, error_work);
+        struct lima_sched_task *task = pipe->current_task;
+        lima_sched_handle_error_task(pipe, task);
+}
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
+{
+        long timeout;
+        if (lima_sched_timeout_ms <= 0)
+                timeout = MAX_SCHEDULE_TIMEOUT;
+        else
+                timeout = msecs_to_jiffies(lima_sched_timeout_ms);
+        pipe->fence_context = dma_fence_context_alloc(1);
+        spin_lock_init(&pipe->fence_lock);
+        INIT_WORK(&pipe->error_work, lima_sched_error_work);
+        return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
+}
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
+{
+        drm_sched_fini(&pipe->base);
+}
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
+{
+        if (pipe->error)
+                schedule_work(&pipe->error_work);
+        else {
+                struct lima_sched_task *task = pipe->current_task;
+                pipe->task_fini(pipe);
+                dma_fence_signal(task->fence);
+        }
+}
author	Qiang Yu <yuq825@gmail.com>	2019-03-09 07:20:12 -0500
committer	Eric Anholt <eric@anholt.net>	2019-04-01 13:45:20 -0400
commit	a1d2a6339961efc078208dc3b2f006e9e9a8e119 (patch)
tree	afee34e42027af51de17fb915ce7cde89c2213ec /drivers/gpu/drm/lima/lima_sched.c
parent	6234fc0fb03743536eefba47c08ff8d4c9cf2fae (diff)

diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c new file mode 100644 index 000000000000..97bd9c1deb87 --- /dev/null +++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,404 @@
	1	// SPDX-License-Identifier: GPL-2.0 OR MIT
	2	/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
	3
	4	#include <linux/kthread.h>
	5	#include <linux/slab.h>
	6
	7	#include "lima_drv.h"
	8	#include "lima_sched.h"
	9	#include "lima_vm.h"
	10	#include "lima_mmu.h"
	11	#include "lima_l2_cache.h"
	12	#include "lima_object.h"
	13
	14	struct lima_fence {
	15	struct dma_fence base;
	16	struct lima_sched_pipe *pipe;
	17	};
	18
	19	static struct kmem_cache *lima_fence_slab;
	20	static int lima_fence_slab_refcnt;
	21
	22	int lima_sched_slab_init(void)
	23	{
	24	if (!lima_fence_slab) {
	25	lima_fence_slab = kmem_cache_create(
	26	"lima_fence", sizeof(struct lima_fence), 0,
	27	SLAB_HWCACHE_ALIGN, NULL);
	28	if (!lima_fence_slab)
	29	return -ENOMEM;
	30	}
	31
	32	lima_fence_slab_refcnt++;
	33	return 0;
	34	}
	35
	36	void lima_sched_slab_fini(void)
	37	{
	38	if (!--lima_fence_slab_refcnt) {
	39	kmem_cache_destroy(lima_fence_slab);
	40	lima_fence_slab = NULL;
	41	}
	42	}
	43
	44	static inline struct lima_fence to_lima_fence(struct dma_fence fence)
	45	{
	46	return container_of(fence, struct lima_fence, base);
	47	}
	48
	49	static const char lima_fence_get_driver_name(struct dma_fence fence)
	50	{
	51	return "lima";
	52	}
	53
	54	static const char lima_fence_get_timeline_name(struct dma_fence fence)
	55	{
	56	struct lima_fence *f = to_lima_fence(fence);
	57
	58	return f->pipe->base.name;
	59	}
	60
	61	static void lima_fence_release_rcu(struct rcu_head *rcu)
	62	{
	63	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
	64	struct lima_fence *fence = to_lima_fence(f);
	65
	66	kmem_cache_free(lima_fence_slab, fence);
	67	}
	68
	69	static void lima_fence_release(struct dma_fence *fence)
	70	{
	71	struct lima_fence *f = to_lima_fence(fence);
	72
	73	call_rcu(&f->base.rcu, lima_fence_release_rcu);
	74	}
	75
	76	static const struct dma_fence_ops lima_fence_ops = {
	77	.get_driver_name = lima_fence_get_driver_name,
	78	.get_timeline_name = lima_fence_get_timeline_name,
	79	.release = lima_fence_release,
	80	};
	81
	82	static struct lima_fence lima_fence_create(struct lima_sched_pipe pipe)
	83	{
	84	struct lima_fence *fence;
	85
	86	fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
	87	if (!fence)
	88	return NULL;
	89
	90	fence->pipe = pipe;
	91	dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
	92	pipe->fence_context, ++pipe->fence_seqno);
	93
	94	return fence;
	95	}
	96
	97	static inline struct lima_sched_task to_lima_task(struct drm_sched_job job)
	98	{
	99	return container_of(job, struct lima_sched_task, base);
	100	}
	101
	102	static inline struct lima_sched_pipe to_lima_pipe(struct drm_gpu_scheduler sched)
	103	{
	104	return container_of(sched, struct lima_sched_pipe, base);
	105	}
	106
	107	int lima_sched_task_init(struct lima_sched_task *task,
	108	struct lima_sched_context *context,
	109	struct lima_bo **bos, int num_bos,
	110	struct lima_vm *vm)
	111	{
	112	int err, i;
	113
	114	task->bos = kmemdup(bos, sizeof(bos) num_bos, GFP_KERNEL);
	115	if (!task->bos)
	116	return -ENOMEM;
	117
	118	for (i = 0; i < num_bos; i++)
	119	drm_gem_object_get(&bos[i]->gem);
	120
	121	err = drm_sched_job_init(&task->base, &context->base, vm);
	122	if (err) {
	123	kfree(task->bos);
	124	return err;
	125	}
	126
	127	task->num_bos = num_bos;
	128	task->vm = lima_vm_get(vm);
	129	return 0;
	130	}
	131
	132	void lima_sched_task_fini(struct lima_sched_task *task)
	133	{
	134	int i;
	135
	136	drm_sched_job_cleanup(&task->base);
	137
	138	for (i = 0; i < task->num_dep; i++)
	139	dma_fence_put(task->dep[i]);
	140
	141	kfree(task->dep);
	142
	143	if (task->bos) {
	144	for (i = 0; i < task->num_bos; i++)
	145	drm_gem_object_put_unlocked(&task->bos[i]->gem);
	146	kfree(task->bos);
	147	}
	148
	149	lima_vm_put(task->vm);
	150	}
	151
	152	int lima_sched_task_add_dep(struct lima_sched_task task, struct dma_fence fence)
	153	{
	154	int i, new_dep = 4;
	155
	156	/* same context's fence is definitly earlier then this task */
	157	if (fence->context == task->base.s_fence->finished.context) {
	158	dma_fence_put(fence);
	159	return 0;
	160	}
	161
	162	if (task->dep && task->num_dep == task->max_dep)
	163	new_dep = task->max_dep * 2;
	164
	165	if (task->max_dep < new_dep) {
	166	void dep = krealloc(task->dep, sizeof(task->dep) * new_dep, GFP_KERNEL);
	167
	168	if (!dep)
	169	return -ENOMEM;
	170
	171	task->max_dep = new_dep;
	172	task->dep = dep;
	173	}
	174
	175	for (i = 0; i < task->num_dep; i++) {
	176	if (task->dep[i]->context == fence->context &&
	177	dma_fence_is_later(fence, task->dep[i])) {
	178	dma_fence_put(task->dep[i]);
	179	task->dep[i] = fence;
	180	return 0;
	181	}
	182	}
	183
	184	task->dep[task->num_dep++] = fence;
	185	return 0;
	186	}
	187
	188	int lima_sched_context_init(struct lima_sched_pipe *pipe,
	189	struct lima_sched_context *context,
	190	atomic_t *guilty)
	191	{
	192	struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
	193
	194	return drm_sched_entity_init(&context->base, &rq, 1, guilty);
	195	}
	196
	197	void lima_sched_context_fini(struct lima_sched_pipe *pipe,
	198	struct lima_sched_context *context)
	199	{
	200	drm_sched_entity_fini(&context->base);
	201	}
	202
	203	struct dma_fence lima_sched_context_queue_task(struct lima_sched_context context,
	204	struct lima_sched_task *task)
	205	{
	206	struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
	207
	208	drm_sched_entity_push_job(&task->base, &context->base);
	209	return fence;
	210	}
	211
	212	static struct dma_fence lima_sched_dependency(struct drm_sched_job job,
	213	struct drm_sched_entity *entity)
	214	{
	215	struct lima_sched_task *task = to_lima_task(job);
	216	int i;
	217
	218	for (i = 0; i < task->num_dep; i++) {
	219	struct dma_fence *fence = task->dep[i];
	220
	221	if (!task->dep[i])
	222	continue;
	223
	224	task->dep[i] = NULL;
	225
	226	if (!dma_fence_is_signaled(fence))
	227	return fence;
	228
	229	dma_fence_put(fence);
	230	}
	231
	232	return NULL;
	233	}
	234
	235	static struct dma_fence lima_sched_run_job(struct drm_sched_job job)
	236	{
	237	struct lima_sched_task *task = to_lima_task(job);
	238	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
	239	struct lima_fence *fence;
	240	struct dma_fence *ret;
	241	struct lima_vm vm = NULL, last_vm = NULL;
	242	int i;
	243
	244	/* after GPU reset */
	245	if (job->s_fence->finished.error < 0)
	246	return NULL;
	247
	248	fence = lima_fence_create(pipe);
	249	if (!fence)
	250	return NULL;
	251	task->fence = &fence->base;
	252
	253	/* for caller usage of the fence, otherwise irq handler
	254	* may consume the fence before caller use it
	255	*/
	256	ret = dma_fence_get(task->fence);
	257
	258	pipe->current_task = task;
	259
	260	/* this is needed for MMU to work correctly, otherwise GP/PP
	261	* will hang or page fault for unknown reason after running for
	262	* a while.
	263	*
	264	* Need to investigate:
	265	* 1. is it related to TLB
	266	* 2. how much performance will be affected by L2 cache flush
	267	* 3. can we reduce the calling of this function because all
	268	* GP/PP use the same L2 cache on mali400
	269	*
	270	* TODO:
	271	* 1. move this to task fini to save some wait time?
	272	* 2. when GP/PP use different l2 cache, need PP wait GP l2
	273	* cache flush?
	274	*/
	275	for (i = 0; i < pipe->num_l2_cache; i++)
	276	lima_l2_cache_flush(pipe->l2_cache[i]);
	277
	278	if (task->vm != pipe->current_vm) {
	279	vm = lima_vm_get(task->vm);
	280	last_vm = pipe->current_vm;
	281	pipe->current_vm = task->vm;
	282	}
	283
	284	if (pipe->bcast_mmu)
	285	lima_mmu_switch_vm(pipe->bcast_mmu, vm);
	286	else {
	287	for (i = 0; i < pipe->num_mmu; i++)
	288	lima_mmu_switch_vm(pipe->mmu[i], vm);
	289	}
	290
	291	if (last_vm)
	292	lima_vm_put(last_vm);
	293
	294	pipe->error = false;
	295	pipe->task_run(pipe, task);
	296
	297	return task->fence;
	298	}
	299
	300	static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
	301	struct lima_sched_task *task)
	302	{
	303	drm_sched_stop(&pipe->base);
	304
	305	if (task)
	306	drm_sched_increase_karma(&task->base);
	307
	308	pipe->task_error(pipe);
	309
	310	if (pipe->bcast_mmu)
	311	lima_mmu_page_fault_resume(pipe->bcast_mmu);
	312	else {
	313	int i;
	314
	315	for (i = 0; i < pipe->num_mmu; i++)
	316	lima_mmu_page_fault_resume(pipe->mmu[i]);
	317	}
	318
	319	if (pipe->current_vm)
	320	lima_vm_put(pipe->current_vm);
	321
	322	pipe->current_vm = NULL;
	323	pipe->current_task = NULL;
	324
	325	drm_sched_resubmit_jobs(&pipe->base);
	326	drm_sched_start(&pipe->base, true);
	327	}
	328
	329	static void lima_sched_timedout_job(struct drm_sched_job *job)
	330	{
	331	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
	332	struct lima_sched_task *task = to_lima_task(job);
	333
	334	DRM_ERROR("lima job timeout\n");
	335
	336	lima_sched_handle_error_task(pipe, task);
	337	}
	338
	339	static void lima_sched_free_job(struct drm_sched_job *job)
	340	{
	341	struct lima_sched_task *task = to_lima_task(job);
	342	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
	343	struct lima_vm *vm = task->vm;
	344	struct lima_bo **bos = task->bos;
	345	int i;
	346
	347	dma_fence_put(task->fence);
	348
	349	for (i = 0; i < task->num_bos; i++)
	350	lima_vm_bo_del(vm, bos[i]);
	351
	352	lima_sched_task_fini(task);
	353	kmem_cache_free(pipe->task_slab, task);
	354	}
	355
	356	const struct drm_sched_backend_ops lima_sched_ops = {
	357	.dependency = lima_sched_dependency,
	358	.run_job = lima_sched_run_job,
	359	.timedout_job = lima_sched_timedout_job,
	360	.free_job = lima_sched_free_job,
	361	};
	362
	363	static void lima_sched_error_work(struct work_struct *work)
	364	{
	365	struct lima_sched_pipe *pipe =
	366	container_of(work, struct lima_sched_pipe, error_work);
	367	struct lima_sched_task *task = pipe->current_task;
	368
	369	lima_sched_handle_error_task(pipe, task);
	370	}
	371
	372	int lima_sched_pipe_init(struct lima_sched_pipe pipe, const char name)
	373	{
	374	long timeout;
	375
	376	if (lima_sched_timeout_ms <= 0)
	377	timeout = MAX_SCHEDULE_TIMEOUT;
	378	else
	379	timeout = msecs_to_jiffies(lima_sched_timeout_ms);
	380
	381	pipe->fence_context = dma_fence_context_alloc(1);
	382	spin_lock_init(&pipe->fence_lock);
	383
	384	INIT_WORK(&pipe->error_work, lima_sched_error_work);
	385
	386	return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
	387	}
	388
	389	void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
	390	{
	391	drm_sched_fini(&pipe->base);
	392	}
	393
	394	void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
	395	{
	396	if (pipe->error)
	397	schedule_work(&pipe->error_work);
	398	else {
	399	struct lima_sched_task *task = pipe->current_task;
	400
	401	pipe->task_fini(pipe);
	402	dma_fence_signal(task->fence);
	403	}
	404	}