aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorChunming Zhou <David1.Zhou@amd.com>2016-07-15 03:57:13 -0400
committerAlex Deucher <alexander.deucher@amd.com>2016-08-08 11:32:05 -0400
commit35d782feae7f0b817016315d8718a82c61968894 (patch)
tree0e96ddedc96c8be182678c9965f33e0b05d0af3c /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent1057f20c2252c5eba17e4a9bbf74ea990411f46c (diff)
drm/amdgpu: add amdgpu soft reset
Check gpu status first, if MC/VMC/DISPLAY hang, directly triger full reset. If engine hangs, then triger engine soft reset, if soft reset fails, will fallback to full reset. Signed-off-by: Chunming Zhou <David1.Zhou@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c104
1 files changed, 86 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b88620586c8e..2bd2b19d4666 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1962,7 +1962,8 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
1962 for (i = 0; i < adev->num_ip_blocks; i++) { 1962 for (i = 0; i < adev->num_ip_blocks; i++) {
1963 if (!adev->ip_block_status[i].valid) 1963 if (!adev->ip_block_status[i].valid)
1964 continue; 1964 continue;
1965 if (adev->ip_blocks[i].funcs->pre_soft_reset) { 1965 if (adev->ip_block_status[i].hang &&
1966 adev->ip_blocks[i].funcs->pre_soft_reset) {
1966 r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); 1967 r = adev->ip_blocks[i].funcs->pre_soft_reset(adev);
1967 if (r) 1968 if (r)
1968 return r; 1969 return r;
@@ -1972,6 +1973,58 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
1972 return 0; 1973 return 0;
1973} 1974}
1974 1975
1976static bool amdgpu_need_full_reset(struct amdgpu_device *adev)
1977{
1978 if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang ||
1979 adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang ||
1980 adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang ||
1981 adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang ||
1982 adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang ||
1983 adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang ||
1984 adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang ||
1985 adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang ||
1986 adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) {
1987 DRM_INFO("Some block need full reset!\n");
1988 return true;
1989 }
1990 return false;
1991}
1992
1993static int amdgpu_soft_reset(struct amdgpu_device *adev)
1994{
1995 int i, r = 0;
1996
1997 for (i = 0; i < adev->num_ip_blocks; i++) {
1998 if (!adev->ip_block_status[i].valid)
1999 continue;
2000 if (adev->ip_block_status[i].hang &&
2001 adev->ip_blocks[i].funcs->soft_reset) {
2002 r = adev->ip_blocks[i].funcs->soft_reset(adev);
2003 if (r)
2004 return r;
2005 }
2006 }
2007
2008 return 0;
2009}
2010
2011static int amdgpu_post_soft_reset(struct amdgpu_device *adev)
2012{
2013 int i, r = 0;
2014
2015 for (i = 0; i < adev->num_ip_blocks; i++) {
2016 if (!adev->ip_block_status[i].valid)
2017 continue;
2018 if (adev->ip_block_status[i].hang &&
2019 adev->ip_blocks[i].funcs->post_soft_reset)
2020 r = adev->ip_blocks[i].funcs->post_soft_reset(adev);
2021 if (r)
2022 return r;
2023 }
2024
2025 return 0;
2026}
2027
1975/** 2028/**
1976 * amdgpu_gpu_reset - reset the asic 2029 * amdgpu_gpu_reset - reset the asic
1977 * 2030 *
@@ -1984,6 +2037,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
1984{ 2037{
1985 int i, r; 2038 int i, r;
1986 int resched; 2039 int resched;
2040 bool need_full_reset;
1987 2041
1988 if (!amdgpu_check_soft_reset(adev)) { 2042 if (!amdgpu_check_soft_reset(adev)) {
1989 DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); 2043 DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2007,28 +2061,42 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
2007 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 2061 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
2008 amdgpu_fence_driver_force_completion(adev); 2062 amdgpu_fence_driver_force_completion(adev);
2009 2063
2010 /* save scratch */ 2064 need_full_reset = amdgpu_need_full_reset(adev);
2011 amdgpu_atombios_scratch_regs_save(adev);
2012 r = amdgpu_suspend(adev);
2013 2065
2014retry: 2066 if (!need_full_reset) {
2015 /* Disable fb access */ 2067 amdgpu_pre_soft_reset(adev);
2016 if (adev->mode_info.num_crtc) { 2068 r = amdgpu_soft_reset(adev);
2017 struct amdgpu_mode_mc_save save; 2069 amdgpu_post_soft_reset(adev);
2018 amdgpu_display_stop_mc_access(adev, &save); 2070 if (r || amdgpu_check_soft_reset(adev)) {
2019 amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); 2071 DRM_INFO("soft reset failed, will fallback to full reset!\n");
2072 need_full_reset = true;
2073 }
2020 } 2074 }
2021 2075
2022 r = amdgpu_asic_reset(adev); 2076 if (need_full_reset) {
2023 /* post card */ 2077 /* save scratch */
2024 amdgpu_atom_asic_init(adev->mode_info.atom_context); 2078 amdgpu_atombios_scratch_regs_save(adev);
2079 r = amdgpu_suspend(adev);
2025 2080
2026 if (!r) { 2081retry:
2027 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 2082 /* Disable fb access */
2028 r = amdgpu_resume(adev); 2083 if (adev->mode_info.num_crtc) {
2084 struct amdgpu_mode_mc_save save;
2085 amdgpu_display_stop_mc_access(adev, &save);
2086 amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
2087 }
2088
2089 r = amdgpu_asic_reset(adev);
2090 /* post card */
2091 amdgpu_atom_asic_init(adev->mode_info.atom_context);
2092
2093 if (!r) {
2094 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
2095 r = amdgpu_resume(adev);
2096 }
2097 /* restore scratch */
2098 amdgpu_atombios_scratch_regs_restore(adev);
2029 } 2099 }
2030 /* restore scratch */
2031 amdgpu_atombios_scratch_regs_restore(adev);
2032 if (!r) { 2100 if (!r) {
2033 r = amdgpu_ib_ring_tests(adev); 2101 r = amdgpu_ib_ring_tests(adev);
2034 if (r) { 2102 if (r) {