aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorGavin Wan <Gavin.Wan@amd.com>2017-06-23 13:55:15 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-07-14 11:05:52 -0400
commit890419409a3aba2ca7185a824e47d8ded8df11a2 (patch)
tree0b9575763284a062e87738446a7c77c44095fb98 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent8e1b90cc44181405418071a13ead5892c3879239 (diff)
drm/amdgpu: Support passing amdgpu critical error to host via GPU Mailbox.
This feature works for SRIOV enviroment. For non-SRIOV enviroment, the trans_error function does nothing. The error information includes error_code (16bit), error_flags(16bit) and error_data(64bit). Since there are not many errors, we keep the errors in an array and transfer all errors to Host before amdgpu initialization function (amdgpu_device_init) exit. Signed-off-by: Gavin Wan <Gavin.Wan@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ae4387fd2b65..88e45c6d36ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -53,6 +53,7 @@
53#include "bif/bif_4_1_d.h" 53#include "bif/bif_4_1_d.h"
54#include <linux/pci.h> 54#include <linux/pci.h>
55#include <linux/firmware.h> 55#include <linux/firmware.h>
56#include "amdgpu_vf_error.h"
56 57
57MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 58MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
58MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 59MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -2134,6 +2135,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2134 r = amdgpu_atombios_init(adev); 2135 r = amdgpu_atombios_init(adev);
2135 if (r) { 2136 if (r) {
2136 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2137 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2138 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2137 goto failed; 2139 goto failed;
2138 } 2140 }
2139 2141
@@ -2144,6 +2146,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2144 if (amdgpu_vpost_needed(adev)) { 2146 if (amdgpu_vpost_needed(adev)) {
2145 if (!adev->bios) { 2147 if (!adev->bios) {
2146 dev_err(adev->dev, "no vBIOS found\n"); 2148 dev_err(adev->dev, "no vBIOS found\n");
2149 amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2147 r = -EINVAL; 2150 r = -EINVAL;
2148 goto failed; 2151 goto failed;
2149 } 2152 }
@@ -2151,6 +2154,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2151 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2154 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2152 if (r) { 2155 if (r) {
2153 dev_err(adev->dev, "gpu post error!\n"); 2156 dev_err(adev->dev, "gpu post error!\n");
2157 amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
2154 goto failed; 2158 goto failed;
2155 } 2159 }
2156 } else { 2160 } else {
@@ -2162,7 +2166,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2162 r = amdgpu_atombios_get_clock_info(adev); 2166 r = amdgpu_atombios_get_clock_info(adev);
2163 if (r) { 2167 if (r) {
2164 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2168 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2165 return r; 2169 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2170 goto failed;
2166 } 2171 }
2167 /* init i2c buses */ 2172 /* init i2c buses */
2168 amdgpu_atombios_i2c_init(adev); 2173 amdgpu_atombios_i2c_init(adev);
@@ -2172,6 +2177,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2172 r = amdgpu_fence_driver_init(adev); 2177 r = amdgpu_fence_driver_init(adev);
2173 if (r) { 2178 if (r) {
2174 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2179 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2180 amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2175 goto failed; 2181 goto failed;
2176 } 2182 }
2177 2183
@@ -2181,6 +2187,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2181 r = amdgpu_init(adev); 2187 r = amdgpu_init(adev);
2182 if (r) { 2188 if (r) {
2183 dev_err(adev->dev, "amdgpu_init failed\n"); 2189 dev_err(adev->dev, "amdgpu_init failed\n");
2190 amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2184 amdgpu_fini(adev); 2191 amdgpu_fini(adev);
2185 goto failed; 2192 goto failed;
2186 } 2193 }
@@ -2200,6 +2207,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2200 r = amdgpu_ib_pool_init(adev); 2207 r = amdgpu_ib_pool_init(adev);
2201 if (r) { 2208 if (r) {
2202 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2209 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2210 amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2203 goto failed; 2211 goto failed;
2204 } 2212 }
2205 2213
@@ -2244,12 +2252,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2244 r = amdgpu_late_init(adev); 2252 r = amdgpu_late_init(adev);
2245 if (r) { 2253 if (r) {
2246 dev_err(adev->dev, "amdgpu_late_init failed\n"); 2254 dev_err(adev->dev, "amdgpu_late_init failed\n");
2255 amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2247 goto failed; 2256 goto failed;
2248 } 2257 }
2249 2258
2250 return 0; 2259 return 0;
2251 2260
2252failed: 2261failed:
2262 amdgpu_vf_error_trans_all(adev);
2253 if (runtime) 2263 if (runtime)
2254 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2264 vga_switcheroo_fini_domain_pm_ops(adev->dev);
2255 return r; 2265 return r;
@@ -2937,6 +2947,7 @@ out:
2937 } 2947 }
2938 } else { 2948 } else {
2939 dev_err(adev->dev, "asic resume failed (%d).\n", r); 2949 dev_err(adev->dev, "asic resume failed (%d).\n", r);
2950 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
2940 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2951 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2941 if (adev->rings[i] && adev->rings[i]->sched.thread) { 2952 if (adev->rings[i] && adev->rings[i]->sched.thread) {
2942 kthread_unpark(adev->rings[i]->sched.thread); 2953 kthread_unpark(adev->rings[i]->sched.thread);
@@ -2947,12 +2958,16 @@ out:
2947 drm_helper_resume_force_mode(adev->ddev); 2958 drm_helper_resume_force_mode(adev->ddev);
2948 2959
2949 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 2960 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
2950 if (r) 2961 if (r) {
2951 /* bad news, how to tell it to userspace ? */ 2962 /* bad news, how to tell it to userspace ? */
2952 dev_info(adev->dev, "GPU reset failed\n"); 2963 dev_info(adev->dev, "GPU reset failed\n");
2953 else 2964 amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
2965 }
2966 else {
2954 dev_info(adev->dev, "GPU reset successed!\n"); 2967 dev_info(adev->dev, "GPU reset successed!\n");
2968 }
2955 2969
2970 amdgpu_vf_error_trans_all(adev);
2956 return r; 2971 return r;
2957} 2972}
2958 2973