diff options
author | Chunming Zhou <David1.Zhou@amd.com> | 2016-07-15 03:57:13 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2016-08-08 11:32:05 -0400 |
commit | 35d782feae7f0b817016315d8718a82c61968894 (patch) | |
tree | 0e96ddedc96c8be182678c9965f33e0b05d0af3c /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | 1057f20c2252c5eba17e4a9bbf74ea990411f46c (diff) |
drm/amdgpu: add amdgpu soft reset
Check gpu status first, if MC/VMC/DISPLAY hang, directly triger full reset.
If engine hangs, then triger engine soft reset, if soft reset fails, will
fallback to full reset.
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 104 |
1 files changed, 86 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b88620586c8e..2bd2b19d4666 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -1962,7 +1962,8 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) | |||
1962 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1962 | for (i = 0; i < adev->num_ip_blocks; i++) { |
1963 | if (!adev->ip_block_status[i].valid) | 1963 | if (!adev->ip_block_status[i].valid) |
1964 | continue; | 1964 | continue; |
1965 | if (adev->ip_blocks[i].funcs->pre_soft_reset) { | 1965 | if (adev->ip_block_status[i].hang && |
1966 | adev->ip_blocks[i].funcs->pre_soft_reset) { | ||
1966 | r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); | 1967 | r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); |
1967 | if (r) | 1968 | if (r) |
1968 | return r; | 1969 | return r; |
@@ -1972,6 +1973,58 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) | |||
1972 | return 0; | 1973 | return 0; |
1973 | } | 1974 | } |
1974 | 1975 | ||
1976 | static bool amdgpu_need_full_reset(struct amdgpu_device *adev) | ||
1977 | { | ||
1978 | if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || | ||
1979 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || | ||
1980 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || | ||
1981 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang || | ||
1982 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || | ||
1983 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || | ||
1984 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || | ||
1985 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || | ||
1986 | adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) { | ||
1987 | DRM_INFO("Some block need full reset!\n"); | ||
1988 | return true; | ||
1989 | } | ||
1990 | return false; | ||
1991 | } | ||
1992 | |||
1993 | static int amdgpu_soft_reset(struct amdgpu_device *adev) | ||
1994 | { | ||
1995 | int i, r = 0; | ||
1996 | |||
1997 | for (i = 0; i < adev->num_ip_blocks; i++) { | ||
1998 | if (!adev->ip_block_status[i].valid) | ||
1999 | continue; | ||
2000 | if (adev->ip_block_status[i].hang && | ||
2001 | adev->ip_blocks[i].funcs->soft_reset) { | ||
2002 | r = adev->ip_blocks[i].funcs->soft_reset(adev); | ||
2003 | if (r) | ||
2004 | return r; | ||
2005 | } | ||
2006 | } | ||
2007 | |||
2008 | return 0; | ||
2009 | } | ||
2010 | |||
2011 | static int amdgpu_post_soft_reset(struct amdgpu_device *adev) | ||
2012 | { | ||
2013 | int i, r = 0; | ||
2014 | |||
2015 | for (i = 0; i < adev->num_ip_blocks; i++) { | ||
2016 | if (!adev->ip_block_status[i].valid) | ||
2017 | continue; | ||
2018 | if (adev->ip_block_status[i].hang && | ||
2019 | adev->ip_blocks[i].funcs->post_soft_reset) | ||
2020 | r = adev->ip_blocks[i].funcs->post_soft_reset(adev); | ||
2021 | if (r) | ||
2022 | return r; | ||
2023 | } | ||
2024 | |||
2025 | return 0; | ||
2026 | } | ||
2027 | |||
1975 | /** | 2028 | /** |
1976 | * amdgpu_gpu_reset - reset the asic | 2029 | * amdgpu_gpu_reset - reset the asic |
1977 | * | 2030 | * |
@@ -1984,6 +2037,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) | |||
1984 | { | 2037 | { |
1985 | int i, r; | 2038 | int i, r; |
1986 | int resched; | 2039 | int resched; |
2040 | bool need_full_reset; | ||
1987 | 2041 | ||
1988 | if (!amdgpu_check_soft_reset(adev)) { | 2042 | if (!amdgpu_check_soft_reset(adev)) { |
1989 | DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); | 2043 | DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); |
@@ -2007,28 +2061,42 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) | |||
2007 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 2061 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
2008 | amdgpu_fence_driver_force_completion(adev); | 2062 | amdgpu_fence_driver_force_completion(adev); |
2009 | 2063 | ||
2010 | /* save scratch */ | 2064 | need_full_reset = amdgpu_need_full_reset(adev); |
2011 | amdgpu_atombios_scratch_regs_save(adev); | ||
2012 | r = amdgpu_suspend(adev); | ||
2013 | 2065 | ||
2014 | retry: | 2066 | if (!need_full_reset) { |
2015 | /* Disable fb access */ | 2067 | amdgpu_pre_soft_reset(adev); |
2016 | if (adev->mode_info.num_crtc) { | 2068 | r = amdgpu_soft_reset(adev); |
2017 | struct amdgpu_mode_mc_save save; | 2069 | amdgpu_post_soft_reset(adev); |
2018 | amdgpu_display_stop_mc_access(adev, &save); | 2070 | if (r || amdgpu_check_soft_reset(adev)) { |
2019 | amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); | 2071 | DRM_INFO("soft reset failed, will fallback to full reset!\n"); |
2072 | need_full_reset = true; | ||
2073 | } | ||
2020 | } | 2074 | } |
2021 | 2075 | ||
2022 | r = amdgpu_asic_reset(adev); | 2076 | if (need_full_reset) { |
2023 | /* post card */ | 2077 | /* save scratch */ |
2024 | amdgpu_atom_asic_init(adev->mode_info.atom_context); | 2078 | amdgpu_atombios_scratch_regs_save(adev); |
2079 | r = amdgpu_suspend(adev); | ||
2025 | 2080 | ||
2026 | if (!r) { | 2081 | retry: |
2027 | dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); | 2082 | /* Disable fb access */ |
2028 | r = amdgpu_resume(adev); | 2083 | if (adev->mode_info.num_crtc) { |
2084 | struct amdgpu_mode_mc_save save; | ||
2085 | amdgpu_display_stop_mc_access(adev, &save); | ||
2086 | amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); | ||
2087 | } | ||
2088 | |||
2089 | r = amdgpu_asic_reset(adev); | ||
2090 | /* post card */ | ||
2091 | amdgpu_atom_asic_init(adev->mode_info.atom_context); | ||
2092 | |||
2093 | if (!r) { | ||
2094 | dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); | ||
2095 | r = amdgpu_resume(adev); | ||
2096 | } | ||
2097 | /* restore scratch */ | ||
2098 | amdgpu_atombios_scratch_regs_restore(adev); | ||
2029 | } | 2099 | } |
2030 | /* restore scratch */ | ||
2031 | amdgpu_atombios_scratch_regs_restore(adev); | ||
2032 | if (!r) { | 2100 | if (!r) { |
2033 | r = amdgpu_ib_ring_tests(adev); | 2101 | r = amdgpu_ib_ring_tests(adev); |
2034 | if (r) { | 2102 | if (r) { |