diff options
author | Gavin Wan <Gavin.Wan@amd.com> | 2017-06-23 13:55:15 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-07-14 11:05:52 -0400 |
commit | 890419409a3aba2ca7185a824e47d8ded8df11a2 (patch) | |
tree | 0b9575763284a062e87738446a7c77c44095fb98 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | 8e1b90cc44181405418071a13ead5892c3879239 (diff) |
drm/amdgpu: Support passing amdgpu critical error to host via GPU Mailbox.
This feature works for SRIOV enviroment. For non-SRIOV enviroment, the
trans_error function does nothing.
The error information includes error_code (16bit), error_flags(16bit)
and error_data(64bit). Since there are not many errors, we keep the
errors in an array and transfer all errors to Host before amdgpu
initialization function (amdgpu_device_init) exit.
Signed-off-by: Gavin Wan <Gavin.Wan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ae4387fd2b65..88e45c6d36ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include "bif/bif_4_1_d.h" | 53 | #include "bif/bif_4_1_d.h" |
54 | #include <linux/pci.h> | 54 | #include <linux/pci.h> |
55 | #include <linux/firmware.h> | 55 | #include <linux/firmware.h> |
56 | #include "amdgpu_vf_error.h" | ||
56 | 57 | ||
57 | MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); | 58 | MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); |
58 | MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); | 59 | MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); |
@@ -2134,6 +2135,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2134 | r = amdgpu_atombios_init(adev); | 2135 | r = amdgpu_atombios_init(adev); |
2135 | if (r) { | 2136 | if (r) { |
2136 | dev_err(adev->dev, "amdgpu_atombios_init failed\n"); | 2137 | dev_err(adev->dev, "amdgpu_atombios_init failed\n"); |
2138 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); | ||
2137 | goto failed; | 2139 | goto failed; |
2138 | } | 2140 | } |
2139 | 2141 | ||
@@ -2144,6 +2146,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2144 | if (amdgpu_vpost_needed(adev)) { | 2146 | if (amdgpu_vpost_needed(adev)) { |
2145 | if (!adev->bios) { | 2147 | if (!adev->bios) { |
2146 | dev_err(adev->dev, "no vBIOS found\n"); | 2148 | dev_err(adev->dev, "no vBIOS found\n"); |
2149 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); | ||
2147 | r = -EINVAL; | 2150 | r = -EINVAL; |
2148 | goto failed; | 2151 | goto failed; |
2149 | } | 2152 | } |
@@ -2151,6 +2154,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2151 | r = amdgpu_atom_asic_init(adev->mode_info.atom_context); | 2154 | r = amdgpu_atom_asic_init(adev->mode_info.atom_context); |
2152 | if (r) { | 2155 | if (r) { |
2153 | dev_err(adev->dev, "gpu post error!\n"); | 2156 | dev_err(adev->dev, "gpu post error!\n"); |
2157 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0); | ||
2154 | goto failed; | 2158 | goto failed; |
2155 | } | 2159 | } |
2156 | } else { | 2160 | } else { |
@@ -2162,7 +2166,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2162 | r = amdgpu_atombios_get_clock_info(adev); | 2166 | r = amdgpu_atombios_get_clock_info(adev); |
2163 | if (r) { | 2167 | if (r) { |
2164 | dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); | 2168 | dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); |
2165 | return r; | 2169 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); |
2170 | goto failed; | ||
2166 | } | 2171 | } |
2167 | /* init i2c buses */ | 2172 | /* init i2c buses */ |
2168 | amdgpu_atombios_i2c_init(adev); | 2173 | amdgpu_atombios_i2c_init(adev); |
@@ -2172,6 +2177,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2172 | r = amdgpu_fence_driver_init(adev); | 2177 | r = amdgpu_fence_driver_init(adev); |
2173 | if (r) { | 2178 | if (r) { |
2174 | dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); | 2179 | dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); |
2180 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); | ||
2175 | goto failed; | 2181 | goto failed; |
2176 | } | 2182 | } |
2177 | 2183 | ||
@@ -2181,6 +2187,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2181 | r = amdgpu_init(adev); | 2187 | r = amdgpu_init(adev); |
2182 | if (r) { | 2188 | if (r) { |
2183 | dev_err(adev->dev, "amdgpu_init failed\n"); | 2189 | dev_err(adev->dev, "amdgpu_init failed\n"); |
2190 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); | ||
2184 | amdgpu_fini(adev); | 2191 | amdgpu_fini(adev); |
2185 | goto failed; | 2192 | goto failed; |
2186 | } | 2193 | } |
@@ -2200,6 +2207,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2200 | r = amdgpu_ib_pool_init(adev); | 2207 | r = amdgpu_ib_pool_init(adev); |
2201 | if (r) { | 2208 | if (r) { |
2202 | dev_err(adev->dev, "IB initialization failed (%d).\n", r); | 2209 | dev_err(adev->dev, "IB initialization failed (%d).\n", r); |
2210 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); | ||
2203 | goto failed; | 2211 | goto failed; |
2204 | } | 2212 | } |
2205 | 2213 | ||
@@ -2244,12 +2252,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2244 | r = amdgpu_late_init(adev); | 2252 | r = amdgpu_late_init(adev); |
2245 | if (r) { | 2253 | if (r) { |
2246 | dev_err(adev->dev, "amdgpu_late_init failed\n"); | 2254 | dev_err(adev->dev, "amdgpu_late_init failed\n"); |
2255 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); | ||
2247 | goto failed; | 2256 | goto failed; |
2248 | } | 2257 | } |
2249 | 2258 | ||
2250 | return 0; | 2259 | return 0; |
2251 | 2260 | ||
2252 | failed: | 2261 | failed: |
2262 | amdgpu_vf_error_trans_all(adev); | ||
2253 | if (runtime) | 2263 | if (runtime) |
2254 | vga_switcheroo_fini_domain_pm_ops(adev->dev); | 2264 | vga_switcheroo_fini_domain_pm_ops(adev->dev); |
2255 | return r; | 2265 | return r; |
@@ -2937,6 +2947,7 @@ out: | |||
2937 | } | 2947 | } |
2938 | } else { | 2948 | } else { |
2939 | dev_err(adev->dev, "asic resume failed (%d).\n", r); | 2949 | dev_err(adev->dev, "asic resume failed (%d).\n", r); |
2950 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r); | ||
2940 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 2951 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
2941 | if (adev->rings[i] && adev->rings[i]->sched.thread) { | 2952 | if (adev->rings[i] && adev->rings[i]->sched.thread) { |
2942 | kthread_unpark(adev->rings[i]->sched.thread); | 2953 | kthread_unpark(adev->rings[i]->sched.thread); |
@@ -2947,12 +2958,16 @@ out: | |||
2947 | drm_helper_resume_force_mode(adev->ddev); | 2958 | drm_helper_resume_force_mode(adev->ddev); |
2948 | 2959 | ||
2949 | ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); | 2960 | ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); |
2950 | if (r) | 2961 | if (r) { |
2951 | /* bad news, how to tell it to userspace ? */ | 2962 | /* bad news, how to tell it to userspace ? */ |
2952 | dev_info(adev->dev, "GPU reset failed\n"); | 2963 | dev_info(adev->dev, "GPU reset failed\n"); |
2953 | else | 2964 | amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); |
2965 | } | ||
2966 | else { | ||
2954 | dev_info(adev->dev, "GPU reset successed!\n"); | 2967 | dev_info(adev->dev, "GPU reset successed!\n"); |
2968 | } | ||
2955 | 2969 | ||
2970 | amdgpu_vf_error_trans_all(adev); | ||
2956 | return r; | 2971 | return r; |
2957 | } | 2972 | } |
2958 | 2973 | ||