diff options
author | Monk Liu <Monk.Liu@amd.com> | 2017-10-25 04:21:08 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-12-04 16:41:30 -0500 |
commit | 48f05f2955e4a3183b219d6dfdb1c28e17d03da7 (patch) | |
tree | a444ff5d0e61958d30b7105f999d8695e994581b /drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |
parent | 3a393cf96ab38c72565fda106a825302828b7e05 (diff) |
amd/scheduler:imple job skip feature(v3)
jobs are skipped under two cases
1)when the entity behind this job marked guilty, the job
poped from this entity's queue will be dropped in sched_main loop.
2)in job_recovery(), skip the scheduling job if its karma detected
above limit, and also skipped as well for other jobs sharing the
same fence context. this approach is becuase job_recovery() cannot
access job->entity due to entity may already dead.
v2:
some logic fix
v3:
when entity detected guilty, don't drop the job in the poping
stage, instead set its fence error as -ECANCELED
in run_job(), skip the scheduling either:1) fence->error < 0
or 2) there was a VRAM LOST occurred on this job.
this way we can unify the job skipping logic.
with this feature we can introduce new gpu recover feature.
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_job.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index f60662e03761..0a90c768dbc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
@@ -180,7 +180,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, | |||
180 | 180 | ||
181 | static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) | 181 | static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) |
182 | { | 182 | { |
183 | struct dma_fence *fence = NULL; | 183 | struct dma_fence *fence = NULL, *finished; |
184 | struct amdgpu_device *adev; | 184 | struct amdgpu_device *adev; |
185 | struct amdgpu_job *job; | 185 | struct amdgpu_job *job; |
186 | int r; | 186 | int r; |
@@ -190,15 +190,18 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) | |||
190 | return NULL; | 190 | return NULL; |
191 | } | 191 | } |
192 | job = to_amdgpu_job(sched_job); | 192 | job = to_amdgpu_job(sched_job); |
193 | finished = &job->base.s_fence->finished; | ||
193 | adev = job->adev; | 194 | adev = job->adev; |
194 | 195 | ||
195 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); | 196 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); |
196 | 197 | ||
197 | trace_amdgpu_sched_run_job(job); | 198 | trace_amdgpu_sched_run_job(job); |
198 | /* skip ib schedule when vram is lost */ | 199 | |
199 | if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { | 200 | if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) |
200 | dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); | 201 | dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ |
201 | DRM_ERROR("Skip scheduling IBs!\n"); | 202 | |
203 | if (finished->error < 0) { | ||
204 | DRM_INFO("Skip scheduling IBs!\n"); | ||
202 | } else { | 205 | } else { |
203 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, | 206 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, |
204 | &fence); | 207 | &fence); |