aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMonk Liu <Monk.Liu@amd.com>2016-03-04 05:51:02 -0500
committerAlex Deucher <alexander.deucher@amd.com>2016-05-02 15:19:57 -0400
commit0de2479c953ae07fd11e7b1bc8d4fc831e6842bb (patch)
treeceb625203d34fe6920545e2e08839c7e3e535d03 /drivers
parentcccd9bce978a6644bae75304a05db4faa36faec7 (diff)
drm/amdgpu: rework TDR in scheduler (v2)
Add two callbacks to scheduler to maintain jobs, and invoked for job timeout calculations. Now TDR measures time gap from job is processed by hw. v2: fix typo Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c16
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c37
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h7
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c1
6 files changed, 62 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9bf72b24495c..ccb28468ece8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -754,6 +754,7 @@ void amdgpu_job_free(struct amdgpu_job *job);
754int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, 754int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
755 struct amd_sched_entity *entity, void *owner, 755 struct amd_sched_entity *entity, void *owner,
756 struct fence **f); 756 struct fence **f);
757void amdgpu_job_timeout_func(struct work_struct *work);
757 758
758struct amdgpu_ring { 759struct amdgpu_ring {
759 struct amdgpu_device *adev; 760 struct amdgpu_device *adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 23266b454aec..9025671d21c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -871,6 +871,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
871 871
872 r = amd_sched_job_init(&job->base, &ring->sched, 872 r = amd_sched_job_init(&job->base, &ring->sched,
873 &p->ctx->rings[ring->idx].entity, 873 &p->ctx->rings[ring->idx].entity,
874 amdgpu_job_timeout_func,
874 p->filp, &fence); 875 p->filp, &fence);
875 if (r) { 876 if (r) {
876 amdgpu_job_free(job); 877 amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 23468088a995..961cae4a1955 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -34,6 +34,15 @@ static void amdgpu_job_free_handler(struct work_struct *ws)
34 kfree(job); 34 kfree(job);
35} 35}
36 36
37void amdgpu_job_timeout_func(struct work_struct *work)
38{
39 struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
40 DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
41 job->base.sched->name,
42 (uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
43 job->ring->fence_drv.sync_seq);
44}
45
37int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 46int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
38 struct amdgpu_job **job) 47 struct amdgpu_job **job)
39{ 48{
@@ -103,7 +112,10 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
103 if (!f) 112 if (!f)
104 return -EINVAL; 113 return -EINVAL;
105 114
106 r = amd_sched_job_init(&job->base, &ring->sched, entity, owner, &fence); 115 r = amd_sched_job_init(&job->base, &ring->sched,
116 entity, owner,
117 amdgpu_job_timeout_func,
118 &fence);
107 if (r) 119 if (r)
108 return r; 120 return r;
109 121
@@ -180,4 +192,6 @@ err:
180struct amd_sched_backend_ops amdgpu_sched_ops = { 192struct amd_sched_backend_ops amdgpu_sched_ops = {
181 .dependency = amdgpu_job_dependency, 193 .dependency = amdgpu_job_dependency,
182 .run_job = amdgpu_job_run, 194 .run_job = amdgpu_job_run,
195 .begin_job = amd_sched_job_begin,
196 .finish_job = amd_sched_job_finish,
183}; 197};
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 9a9fffdc272b..b7e8071448c6 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -324,6 +324,40 @@ static void amd_sched_free_job(struct fence *f, struct fence_cb *cb) {
324 schedule_work(&job->work_free_job); 324 schedule_work(&job->work_free_job);
325} 325}
326 326
327/* job_finish is called after hw fence signaled, and
328 * the job had already been deleted from ring_mirror_list
329 */
330void amd_sched_job_finish(struct amd_sched_job *s_job)
331{
332 struct amd_sched_job *next;
333 struct amd_gpu_scheduler *sched = s_job->sched;
334
335 if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
336 cancel_delayed_work(&s_job->work_tdr); /*TODO: how to deal the case that tdr is running */
337
338 /* queue TDR for next job */
339 next = list_first_entry_or_null(&sched->ring_mirror_list,
340 struct amd_sched_job, node);
341
342 if (next) {
343 INIT_DELAYED_WORK(&next->work_tdr, s_job->timeout_callback);
344 schedule_delayed_work(&next->work_tdr, sched->timeout);
345 }
346 }
347}
348
349void amd_sched_job_begin(struct amd_sched_job *s_job)
350{
351 struct amd_gpu_scheduler *sched = s_job->sched;
352
353 if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
354 list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node) == s_job)
355 {
356 INIT_DELAYED_WORK(&s_job->work_tdr, s_job->timeout_callback);
357 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
358 }
359}
360
327/** 361/**
328 * Submit a job to the job queue 362 * Submit a job to the job queue
329 * 363 *
@@ -347,6 +381,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
347int amd_sched_job_init(struct amd_sched_job *job, 381int amd_sched_job_init(struct amd_sched_job *job,
348 struct amd_gpu_scheduler *sched, 382 struct amd_gpu_scheduler *sched,
349 struct amd_sched_entity *entity, 383 struct amd_sched_entity *entity,
384 void (*timeout_cb)(struct work_struct *work),
350 void *owner, struct fence **fence) 385 void *owner, struct fence **fence)
351{ 386{
352 INIT_LIST_HEAD(&job->node); 387 INIT_LIST_HEAD(&job->node);
@@ -357,6 +392,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
357 return -ENOMEM; 392 return -ENOMEM;
358 393
359 job->s_fence->s_job = job; 394 job->s_fence->s_job = job;
395 job->timeout_callback = timeout_cb;
360 396
361 if (fence) 397 if (fence)
362 *fence = &job->s_fence->base; 398 *fence = &job->s_fence->base;
@@ -415,6 +451,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
415 /* remove job from ring_mirror_list */ 451 /* remove job from ring_mirror_list */
416 spin_lock_irqsave(&sched->job_list_lock, flags); 452 spin_lock_irqsave(&sched->job_list_lock, flags);
417 list_del_init(&s_fence->s_job->node); 453 list_del_init(&s_fence->s_job->node);
454 sched->ops->finish_job(s_fence->s_job);
418 spin_unlock_irqrestore(&sched->job_list_lock, flags); 455 spin_unlock_irqrestore(&sched->job_list_lock, flags);
419 456
420 amd_sched_fence_signal(s_fence); 457 amd_sched_fence_signal(s_fence);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index b26148d24a3d..a5700aded5bf 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -85,6 +85,8 @@ struct amd_sched_job {
85 struct fence_cb cb_free_job; 85 struct fence_cb cb_free_job;
86 struct work_struct work_free_job; 86 struct work_struct work_free_job;
87 struct list_head node; 87 struct list_head node;
88 struct delayed_work work_tdr;
89 void (*timeout_callback) (struct work_struct *work);
88}; 90};
89 91
90extern const struct fence_ops amd_sched_fence_ops; 92extern const struct fence_ops amd_sched_fence_ops;
@@ -105,6 +107,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
105struct amd_sched_backend_ops { 107struct amd_sched_backend_ops {
106 struct fence *(*dependency)(struct amd_sched_job *sched_job); 108 struct fence *(*dependency)(struct amd_sched_job *sched_job);
107 struct fence *(*run_job)(struct amd_sched_job *sched_job); 109 struct fence *(*run_job)(struct amd_sched_job *sched_job);
110 void (*begin_job)(struct amd_sched_job *sched_job);
111 void (*finish_job)(struct amd_sched_job *sched_job);
108}; 112};
109 113
110enum amd_sched_priority { 114enum amd_sched_priority {
@@ -150,7 +154,10 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence);
150int amd_sched_job_init(struct amd_sched_job *job, 154int amd_sched_job_init(struct amd_sched_job *job,
151 struct amd_gpu_scheduler *sched, 155 struct amd_gpu_scheduler *sched,
152 struct amd_sched_entity *entity, 156 struct amd_sched_entity *entity,
157 void (*timeout_cb)(struct work_struct *work),
153 void *owner, struct fence **fence); 158 void *owner, struct fence **fence);
154void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched , 159void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
155 struct amd_sched_job *s_job); 160 struct amd_sched_job *s_job);
161void amd_sched_job_finish(struct amd_sched_job *s_job);
162void amd_sched_job_begin(struct amd_sched_job *s_job);
156#endif 163#endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 33ddd38185d5..2a732c490375 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -63,6 +63,7 @@ void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
63 unsigned long flags; 63 unsigned long flags;
64 spin_lock_irqsave(&sched->job_list_lock, flags); 64 spin_lock_irqsave(&sched->job_list_lock, flags);
65 list_add_tail(&s_job->node, &sched->ring_mirror_list); 65 list_add_tail(&s_job->node, &sched->ring_mirror_list);
66 sched->ops->begin_job(s_job);
66 spin_unlock_irqrestore(&sched->job_list_lock, flags); 67 spin_unlock_irqrestore(&sched->job_list_lock, flags);
67} 68}
68 69