diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c961e781430d..8f431740c424 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -3244,31 +3244,43 @@ error: | |||
3244 | } | 3244 | } |
3245 | 3245 | ||
3246 | /** | 3246 | /** |
3247 | * amdgpu_device_should_recover_gpu - check if we should try GPU recovery | ||
3248 | * | ||
3249 | * @adev: amdgpu device pointer | ||
3250 | * | ||
3251 | * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover | ||
3252 | * a hung GPU. | ||
3253 | */ | ||
3254 | bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) | ||
3255 | { | ||
3256 | if (!amdgpu_device_ip_check_soft_reset(adev)) { | ||
3257 | DRM_INFO("Timeout, but no hardware hang detected.\n"); | ||
3258 | return false; | ||
3259 | } | ||
3260 | |||
3261 | if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && | ||
3262 | !amdgpu_sriov_vf(adev))) { | ||
3263 | DRM_INFO("GPU recovery disabled.\n"); | ||
3264 | return false; | ||
3265 | } | ||
3266 | |||
3267 | return true; | ||
3268 | } | ||
3269 | |||
3270 | /** | ||
3247 | * amdgpu_device_gpu_recover - reset the asic and recover scheduler | 3271 | * amdgpu_device_gpu_recover - reset the asic and recover scheduler |
3248 | * | 3272 | * |
3249 | * @adev: amdgpu device pointer | 3273 | * @adev: amdgpu device pointer |
3250 | * @job: which job trigger hang | 3274 | * @job: which job trigger hang |
3251 | * @force: forces reset regardless of amdgpu_gpu_recovery | ||
3252 | * | 3275 | * |
3253 | * Attempt to reset the GPU if it has hung (all asics). | 3276 | * Attempt to reset the GPU if it has hung (all asics). |
3254 | * Returns 0 for success or an error on failure. | 3277 | * Returns 0 for success or an error on failure. |
3255 | */ | 3278 | */ |
3256 | int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | 3279 | int amdgpu_device_gpu_recover(struct amdgpu_device *adev, |
3257 | struct amdgpu_job *job, bool force) | 3280 | struct amdgpu_job *job) |
3258 | { | 3281 | { |
3259 | int i, r, resched; | 3282 | int i, r, resched; |
3260 | 3283 | ||
3261 | if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { | ||
3262 | DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); | ||
3263 | return 0; | ||
3264 | } | ||
3265 | |||
3266 | if (!force && (amdgpu_gpu_recovery == 0 || | ||
3267 | (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { | ||
3268 | DRM_INFO("GPU recovery disabled.\n"); | ||
3269 | return 0; | ||
3270 | } | ||
3271 | |||
3272 | dev_info(adev->dev, "GPU reset begin!\n"); | 3284 | dev_info(adev->dev, "GPU reset begin!\n"); |
3273 | 3285 | ||
3274 | mutex_lock(&adev->lock_reset); | 3286 | mutex_lock(&adev->lock_reset); |