diff options
author | Monk Liu <Monk.Liu@amd.com> | 2017-12-25 02:14:58 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2018-03-01 11:52:23 -0500 |
commit | 711826656bebb09b814349fac21cb13f88f92665 (patch) | |
tree | 1d0de6726037201ef12da1a861de954f310e91a2 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | d869ae092e39022c2bba81ea498abe74249a338c (diff) |
drm/amdgpu: stop all rings before doing gpu recover
found recover_vram_from_shadow sometimes get executed
in paralle with SDMA scheduler, should stop all
schedulers before doing gpu reset/recover
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 40 |
1 files changed, 15 insertions, 25 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 41244858df64..64bd30075951 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -2648,22 +2648,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
2648 | 2648 | ||
2649 | /* block TTM */ | 2649 | /* block TTM */ |
2650 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); | 2650 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); |
2651 | |||
2651 | /* store modesetting */ | 2652 | /* store modesetting */ |
2652 | if (amdgpu_device_has_dc_support(adev)) | 2653 | if (amdgpu_device_has_dc_support(adev)) |
2653 | state = drm_atomic_helper_suspend(adev->ddev); | 2654 | state = drm_atomic_helper_suspend(adev->ddev); |
2654 | 2655 | ||
2655 | /* block scheduler */ | 2656 | /* block all schedulers and reset given job's ring */ |
2656 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 2657 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
2657 | struct amdgpu_ring *ring = adev->rings[i]; | 2658 | struct amdgpu_ring *ring = adev->rings[i]; |
2658 | 2659 | ||
2659 | if (!ring || !ring->sched.thread) | 2660 | if (!ring || !ring->sched.thread) |
2660 | continue; | 2661 | continue; |
2661 | 2662 | ||
2662 | /* only focus on the ring hit timeout if &job not NULL */ | 2663 | kthread_park(ring->sched.thread); |
2664 | |||
2663 | if (job && job->ring->idx != i) | 2665 | if (job && job->ring->idx != i) |
2664 | continue; | 2666 | continue; |
2665 | 2667 | ||
2666 | kthread_park(ring->sched.thread); | ||
2667 | drm_sched_hw_job_reset(&ring->sched, &job->base); | 2668 | drm_sched_hw_job_reset(&ring->sched, &job->base); |
2668 | 2669 | ||
2669 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 2670 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
@@ -2706,33 +2707,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
2706 | } | 2707 | } |
2707 | dma_fence_put(fence); | 2708 | dma_fence_put(fence); |
2708 | } | 2709 | } |
2710 | } | ||
2709 | 2711 | ||
2710 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 2712 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
2711 | struct amdgpu_ring *ring = adev->rings[i]; | 2713 | struct amdgpu_ring *ring = adev->rings[i]; |
2712 | |||
2713 | if (!ring || !ring->sched.thread) | ||
2714 | continue; | ||
2715 | 2714 | ||
2716 | /* only focus on the ring hit timeout if &job not NULL */ | 2715 | if (!ring || !ring->sched.thread) |
2717 | if (job && job->ring->idx != i) | 2716 | continue; |
2718 | continue; | ||
2719 | 2717 | ||
2718 | /* only need recovery sched of the given job's ring | ||
2719 | * or all rings (in the case @job is NULL) | ||
2720 | * after above amdgpu_reset accomplished | ||
2721 | */ | ||
2722 | if ((!job || job->ring->idx == i) && !r) | ||
2720 | drm_sched_job_recovery(&ring->sched); | 2723 | drm_sched_job_recovery(&ring->sched); |
2721 | kthread_unpark(ring->sched.thread); | ||
2722 | } | ||
2723 | } else { | ||
2724 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
2725 | struct amdgpu_ring *ring = adev->rings[i]; | ||
2726 | 2724 | ||
2727 | if (!ring || !ring->sched.thread) | 2725 | kthread_unpark(ring->sched.thread); |
2728 | continue; | ||
2729 | |||
2730 | /* only focus on the ring hit timeout if &job not NULL */ | ||
2731 | if (job && job->ring->idx != i) | ||
2732 | continue; | ||
2733 | |||
2734 | kthread_unpark(adev->rings[i]->sched.thread); | ||
2735 | } | ||
2736 | } | 2726 | } |
2737 | 2727 | ||
2738 | if (amdgpu_device_has_dc_support(adev)) { | 2728 | if (amdgpu_device_has_dc_support(adev)) { |