aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h13
4 files changed, 54 insertions, 41 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3e84ddf9e3b5..fc0c1cde69ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2040,6 +2040,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2040 mutex_init(&adev->srbm_mutex); 2040 mutex_init(&adev->srbm_mutex);
2041 mutex_init(&adev->grbm_idx_mutex); 2041 mutex_init(&adev->grbm_idx_mutex);
2042 mutex_init(&adev->mn_lock); 2042 mutex_init(&adev->mn_lock);
2043 mutex_init(&adev->virt.vf_errors.lock);
2043 hash_init(adev->mn_hash); 2044 hash_init(adev->mn_hash);
2044 2045
2045 amdgpu_check_arguments(adev); 2046 amdgpu_check_arguments(adev);
@@ -2125,7 +2126,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2125 r = amdgpu_atombios_init(adev); 2126 r = amdgpu_atombios_init(adev);
2126 if (r) { 2127 if (r) {
2127 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2128 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2128 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2129 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2129 goto failed; 2130 goto failed;
2130 } 2131 }
2131 2132
@@ -2136,7 +2137,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2136 if (amdgpu_vpost_needed(adev)) { 2137 if (amdgpu_vpost_needed(adev)) {
2137 if (!adev->bios) { 2138 if (!adev->bios) {
2138 dev_err(adev->dev, "no vBIOS found\n"); 2139 dev_err(adev->dev, "no vBIOS found\n");
2139 amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2140 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2140 r = -EINVAL; 2141 r = -EINVAL;
2141 goto failed; 2142 goto failed;
2142 } 2143 }
@@ -2144,7 +2145,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2144 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2145 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2145 if (r) { 2146 if (r) {
2146 dev_err(adev->dev, "gpu post error!\n"); 2147 dev_err(adev->dev, "gpu post error!\n");
2147 amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0); 2148 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
2148 goto failed; 2149 goto failed;
2149 } 2150 }
2150 } else { 2151 } else {
@@ -2156,7 +2157,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2156 r = amdgpu_atomfirmware_get_clock_info(adev); 2157 r = amdgpu_atomfirmware_get_clock_info(adev);
2157 if (r) { 2158 if (r) {
2158 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 2159 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
2159 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2160 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2160 goto failed; 2161 goto failed;
2161 } 2162 }
2162 } else { 2163 } else {
@@ -2164,7 +2165,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2164 r = amdgpu_atombios_get_clock_info(adev); 2165 r = amdgpu_atombios_get_clock_info(adev);
2165 if (r) { 2166 if (r) {
2166 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2167 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2167 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2168 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2168 goto failed; 2169 goto failed;
2169 } 2170 }
2170 /* init i2c buses */ 2171 /* init i2c buses */
@@ -2175,7 +2176,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2175 r = amdgpu_fence_driver_init(adev); 2176 r = amdgpu_fence_driver_init(adev);
2176 if (r) { 2177 if (r) {
2177 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2178 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2178 amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 2179 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2179 goto failed; 2180 goto failed;
2180 } 2181 }
2181 2182
@@ -2185,7 +2186,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2185 r = amdgpu_init(adev); 2186 r = amdgpu_init(adev);
2186 if (r) { 2187 if (r) {
2187 dev_err(adev->dev, "amdgpu_init failed\n"); 2188 dev_err(adev->dev, "amdgpu_init failed\n");
2188 amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2189 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2189 amdgpu_fini(adev); 2190 amdgpu_fini(adev);
2190 goto failed; 2191 goto failed;
2191 } 2192 }
@@ -2205,7 +2206,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2205 r = amdgpu_ib_pool_init(adev); 2206 r = amdgpu_ib_pool_init(adev);
2206 if (r) { 2207 if (r) {
2207 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2208 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2208 amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2209 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2209 goto failed; 2210 goto failed;
2210 } 2211 }
2211 2212
@@ -2254,7 +2255,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2254 r = amdgpu_late_init(adev); 2255 r = amdgpu_late_init(adev);
2255 if (r) { 2256 if (r) {
2256 dev_err(adev->dev, "amdgpu_late_init failed\n"); 2257 dev_err(adev->dev, "amdgpu_late_init failed\n");
2257 amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 2258 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2258 goto failed; 2259 goto failed;
2259 } 2260 }
2260 2261
@@ -2936,7 +2937,7 @@ out:
2936 } 2937 }
2937 } else { 2938 } else {
2938 dev_err(adev->dev, "asic resume failed (%d).\n", r); 2939 dev_err(adev->dev, "asic resume failed (%d).\n", r);
2939 amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r); 2940 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
2940 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2941 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2941 if (adev->rings[i] && adev->rings[i]->sched.thread) { 2942 if (adev->rings[i] && adev->rings[i]->sched.thread) {
2942 kthread_unpark(adev->rings[i]->sched.thread); 2943 kthread_unpark(adev->rings[i]->sched.thread);
@@ -2950,7 +2951,7 @@ out:
2950 if (r) { 2951 if (r) {
2951 /* bad news, how to tell it to userspace ? */ 2952 /* bad news, how to tell it to userspace ? */
2952 dev_info(adev->dev, "GPU reset failed\n"); 2953 dev_info(adev->dev, "GPU reset failed\n");
2953 amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 2954 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
2954 } 2955 }
2955 else { 2956 else {
2956 dev_info(adev->dev, "GPU reset successed!\n"); 2957 dev_info(adev->dev, "GPU reset successed!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
index 45ac91861965..746b81339835 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
@@ -25,30 +25,21 @@
25#include "amdgpu_vf_error.h" 25#include "amdgpu_vf_error.h"
26#include "mxgpu_ai.h" 26#include "mxgpu_ai.h"
27 27
28#define AMDGPU_VF_ERROR_ENTRY_SIZE 16 28void amdgpu_vf_error_put(struct amdgpu_device *adev,
29 29 uint16_t sub_error_code,
30/* struct error_entry - amdgpu VF error information. */ 30 uint16_t error_flags,
31struct amdgpu_vf_error_buffer { 31 uint64_t error_data)
32 int read_count;
33 int write_count;
34 uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
35 uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
36 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
37};
38
39struct amdgpu_vf_error_buffer admgpu_vf_errors;
40
41
42void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data)
43{ 32{
44 int index; 33 int index;
45 uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); 34 uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
46 35
47 index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE; 36 mutex_lock(&adev->virt.vf_errors.lock);
48 admgpu_vf_errors.code [index] = error_code; 37 index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
49 admgpu_vf_errors.flags [index] = error_flags; 38 adev->virt.vf_errors.code [index] = error_code;
50 admgpu_vf_errors.data [index] = error_data; 39 adev->virt.vf_errors.flags [index] = error_flags;
51 admgpu_vf_errors.write_count ++; 40 adev->virt.vf_errors.data [index] = error_data;
41 adev->virt.vf_errors.write_count ++;
42 mutex_unlock(&adev->virt.vf_errors.lock);
52} 43}
53 44
54 45
@@ -58,7 +49,8 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
58 u32 data1, data2, data3; 49 u32 data1, data2, data3;
59 int index; 50 int index;
60 51
61 if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) { 52 if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
53 (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
62 return; 54 return;
63 } 55 }
64/* 56/*
@@ -68,18 +60,22 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
68 return; 60 return;
69 } 61 }
70*/ 62*/
63
64 mutex_lock(&adev->virt.vf_errors.lock);
71 /* The errors are overlay of array, correct read_count as full. */ 65 /* The errors are overlay of array, correct read_count as full. */
72 if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) { 66 if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
73 admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE; 67 adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
74 } 68 }
75 69
76 while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) { 70 while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
77 index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE; 71 index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
78 data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]); 72 data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
79 data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF; 73 adev->virt.vf_errors.flags[index]);
80 data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF; 74 data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
75 data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
81 76
82 adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3); 77 adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
83 admgpu_vf_errors.read_count ++; 78 adev->virt.vf_errors.read_count ++;
84 } 79 }
80 mutex_unlock(&adev->virt.vf_errors.lock);
85} 81}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
index 2a3278ec76ba..6436bd053325 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
@@ -56,7 +56,10 @@ enum AMDGIM_ERROR_CATEGORY {
56 AMDGIM_ERROR_CATEGORY_MAX 56 AMDGIM_ERROR_CATEGORY_MAX
57}; 57};
58 58
59void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data); 59void amdgpu_vf_error_put(struct amdgpu_device *adev,
60 uint16_t sub_error_code,
61 uint16_t error_flags,
62 uint64_t error_data);
60void amdgpu_vf_error_trans_all (struct amdgpu_device *adev); 63void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
61 64
62#endif /* __VF_ERROR_H__ */ 65#endif /* __VF_ERROR_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index afcfb8bcfb65..e5fd0ff6b29d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -36,6 +36,18 @@ struct amdgpu_mm_table {
36 uint64_t gpu_addr; 36 uint64_t gpu_addr;
37}; 37};
38 38
39#define AMDGPU_VF_ERROR_ENTRY_SIZE 16
40
41/* struct error_entry - amdgpu VF error information. */
42struct amdgpu_vf_error_buffer {
43 struct mutex lock;
44 int read_count;
45 int write_count;
46 uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
47 uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
48 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
49};
50
39/** 51/**
40 * struct amdgpu_virt_ops - amdgpu device virt operations 52 * struct amdgpu_virt_ops - amdgpu device virt operations
41 */ 53 */
@@ -59,6 +71,7 @@ struct amdgpu_virt {
59 struct work_struct flr_work; 71 struct work_struct flr_work;
60 struct amdgpu_mm_table mm_table; 72 struct amdgpu_mm_table mm_table;
61 const struct amdgpu_virt_ops *ops; 73 const struct amdgpu_virt_ops *ops;
74 struct amdgpu_vf_error_buffer vf_errors;
62}; 75};
63 76
64#define AMDGPU_CSA_SIZE (8 * 1024) 77#define AMDGPU_CSA_SIZE (8 * 1024)