diff options
author | Monk Liu <Monk.Liu@amd.com> | 2017-10-16 07:46:43 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-12-04 16:33:10 -0500 |
commit | a8a51a70416baab813606c6014c5f0746958dfb2 (patch) | |
tree | 9198ac8b6d047072804fdd52f367bb4923f05f7e /drivers/gpu | |
parent | 7716ea564f6538236c34749cdf3a6351c2844c1a (diff) |
drm/amdgpu:cleanup job reset routine(v2)
merge the setting guilty on context into this function
to avoid implement extra routine.
v2:
go through entity list and compare the fence_ctx
before operate on the entity, otherwise the entity
may be just a wild pointer
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Chunming Zhou <David1.Zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 31 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 |
3 files changed, 33 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 620e3002001d..d5d450e380bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -2869,7 +2869,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) | |||
2869 | amd_sched_job_kickout(&job->base); | 2869 | amd_sched_job_kickout(&job->base); |
2870 | 2870 | ||
2871 | /* only do job_reset on the hang ring if @job not NULL */ | 2871 | /* only do job_reset on the hang ring if @job not NULL */ |
2872 | amd_sched_hw_job_reset(&ring->sched); | 2872 | amd_sched_hw_job_reset(&ring->sched, NULL); |
2873 | 2873 | ||
2874 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 2874 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
2875 | amdgpu_fence_driver_force_completion(ring); | 2875 | amdgpu_fence_driver_force_completion(ring); |
@@ -2990,7 +2990,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) | |||
2990 | if (!ring || !ring->sched.thread) | 2990 | if (!ring || !ring->sched.thread) |
2991 | continue; | 2991 | continue; |
2992 | kthread_park(ring->sched.thread); | 2992 | kthread_park(ring->sched.thread); |
2993 | amd_sched_hw_job_reset(&ring->sched); | 2993 | amd_sched_hw_job_reset(&ring->sched, NULL); |
2994 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 2994 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
2995 | amdgpu_fence_driver_force_completion(ring); | 2995 | amdgpu_fence_driver_force_completion(ring); |
2996 | } | 2996 | } |
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 764606ce3541..1474866d9048 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | |||
@@ -443,9 +443,18 @@ static void amd_sched_job_timedout(struct work_struct *work) | |||
443 | job->sched->ops->timedout_job(job); | 443 | job->sched->ops->timedout_job(job); |
444 | } | 444 | } |
445 | 445 | ||
446 | void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) | 446 | static void amd_sched_set_guilty(struct amd_sched_job *s_job) |
447 | { | ||
448 | if (atomic_inc_return(&s_job->karma) > s_job->sched->hang_limit) | ||
449 | if (s_job->s_entity->guilty) | ||
450 | atomic_set(s_job->s_entity->guilty, 1); | ||
451 | } | ||
452 | |||
453 | void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *bad) | ||
447 | { | 454 | { |
448 | struct amd_sched_job *s_job; | 455 | struct amd_sched_job *s_job; |
456 | struct amd_sched_entity *entity, *tmp; | ||
457 | int i;; | ||
449 | 458 | ||
450 | spin_lock(&sched->job_list_lock); | 459 | spin_lock(&sched->job_list_lock); |
451 | list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { | 460 | list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { |
@@ -458,6 +467,26 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) | |||
458 | } | 467 | } |
459 | } | 468 | } |
460 | spin_unlock(&sched->job_list_lock); | 469 | spin_unlock(&sched->job_list_lock); |
470 | |||
471 | if (bad) { | ||
472 | bool found = false; | ||
473 | |||
474 | for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++ ) { | ||
475 | struct amd_sched_rq *rq = &sched->sched_rq[i]; | ||
476 | |||
477 | spin_lock(&rq->lock); | ||
478 | list_for_each_entry_safe(entity, tmp, &rq->entities, list) { | ||
479 | if (bad->s_fence->scheduled.context == entity->fence_context) { | ||
480 | found = true; | ||
481 | amd_sched_set_guilty(bad); | ||
482 | break; | ||
483 | } | ||
484 | } | ||
485 | spin_unlock(&rq->lock); | ||
486 | if (found) | ||
487 | break; | ||
488 | } | ||
489 | } | ||
461 | } | 490 | } |
462 | 491 | ||
463 | void amd_sched_job_kickout(struct amd_sched_job *s_job) | 492 | void amd_sched_job_kickout(struct amd_sched_job *s_job) |
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index a05994c60b34..be75172587da 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | |||
@@ -174,7 +174,7 @@ int amd_sched_job_init(struct amd_sched_job *job, | |||
174 | struct amd_gpu_scheduler *sched, | 174 | struct amd_gpu_scheduler *sched, |
175 | struct amd_sched_entity *entity, | 175 | struct amd_sched_entity *entity, |
176 | void *owner); | 176 | void *owner); |
177 | void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); | 177 | void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job); |
178 | void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); | 178 | void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); |
179 | bool amd_sched_dependency_optimized(struct dma_fence* fence, | 179 | bool amd_sched_dependency_optimized(struct dma_fence* fence, |
180 | struct amd_sched_entity *entity); | 180 | struct amd_sched_entity *entity); |