diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2019-07-22 08:27:25 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-07-31 15:50:23 -0400 |
commit | cf04dfd0e91f11ac83d369f809ada8147aeaf3fe (patch) | |
tree | 5571c317a14f29368f3c6d0ede45708449884e7a | |
parent | 8c94810357fad9d583e37785534a8caec558bb24 (diff) |
drm/amdgpu: allow ras interrupt callback to return error data
add error data as parameter for ras interrupt cb and process it
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 37 |
2 files changed, 22 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 143153a3c5b0..b248c8250d75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |||
@@ -1005,7 +1005,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) | |||
1005 | struct ras_ih_data *data = &obj->ih_data; | 1005 | struct ras_ih_data *data = &obj->ih_data; |
1006 | struct amdgpu_iv_entry entry; | 1006 | struct amdgpu_iv_entry entry; |
1007 | int ret; | 1007 | int ret; |
1008 | struct ras_err_data err_data = {0, 0}; | 1008 | struct ras_err_data err_data = {0, 0, 0, NULL}; |
1009 | 1009 | ||
1010 | while (data->rptr != data->wptr) { | 1010 | while (data->rptr != data->wptr) { |
1011 | rmb(); | 1011 | rmb(); |
@@ -1020,14 +1020,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) | |||
1020 | * from the callback to udpate the error type/count, etc | 1020 | * from the callback to udpate the error type/count, etc |
1021 | */ | 1021 | */ |
1022 | if (data->cb) { | 1022 | if (data->cb) { |
1023 | ret = data->cb(obj->adev, &entry); | 1023 | ret = data->cb(obj->adev, &err_data, &entry); |
1024 | /* ue will trigger an interrupt, and in that case | 1024 | /* ue will trigger an interrupt, and in that case |
1025 | * we need do a reset to recovery the whole system. | 1025 | * we need do a reset to recovery the whole system. |
1026 | * But leave IP do that recovery, here we just dispatch | 1026 | * But leave IP do that recovery, here we just dispatch |
1027 | * the error. | 1027 | * the error. |
1028 | */ | 1028 | */ |
1029 | if (ret == AMDGPU_RAS_UE) { | 1029 | if (ret == AMDGPU_RAS_UE) { |
1030 | obj->err_data.ue_count++; | 1030 | obj->err_data.ue_count += err_data.ue_count; |
1031 | } | 1031 | } |
1032 | /* Might need get ce count by register, but not all IP | 1032 | /* Might need get ce count by register, but not all IP |
1033 | * saves ce count, some IP just use one bit or two bits | 1033 | * saves ce count, some IP just use one bit or two bits |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0920db7aff34..2c86a5135ec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |||
@@ -76,9 +76,6 @@ struct ras_common_if { | |||
76 | char name[32]; | 76 | char name[32]; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | typedef int (*ras_ih_cb)(struct amdgpu_device *adev, | ||
80 | struct amdgpu_iv_entry *entry); | ||
81 | |||
82 | struct amdgpu_ras { | 79 | struct amdgpu_ras { |
83 | /* ras infrastructure */ | 80 | /* ras infrastructure */ |
84 | /* for ras itself. */ | 81 | /* for ras itself. */ |
@@ -108,21 +105,6 @@ struct amdgpu_ras { | |||
108 | uint32_t flags; | 105 | uint32_t flags; |
109 | }; | 106 | }; |
110 | 107 | ||
111 | struct ras_ih_data { | ||
112 | /* interrupt bottom half */ | ||
113 | struct work_struct ih_work; | ||
114 | int inuse; | ||
115 | /* IP callback */ | ||
116 | ras_ih_cb cb; | ||
117 | /* full of entries */ | ||
118 | unsigned char *ring; | ||
119 | unsigned int ring_size; | ||
120 | unsigned int element_size; | ||
121 | unsigned int aligned_element_size; | ||
122 | unsigned int rptr; | ||
123 | unsigned int wptr; | ||
124 | }; | ||
125 | |||
126 | struct ras_fs_data { | 108 | struct ras_fs_data { |
127 | char sysfs_name[32]; | 109 | char sysfs_name[32]; |
128 | char debugfs_name[32]; | 110 | char debugfs_name[32]; |
@@ -149,6 +131,25 @@ struct ras_err_handler_data { | |||
149 | int last_reserved; | 131 | int last_reserved; |
150 | }; | 132 | }; |
151 | 133 | ||
134 | typedef int (*ras_ih_cb)(struct amdgpu_device *adev, | ||
135 | struct ras_err_data *err_data, | ||
136 | struct amdgpu_iv_entry *entry); | ||
137 | |||
138 | struct ras_ih_data { | ||
139 | /* interrupt bottom half */ | ||
140 | struct work_struct ih_work; | ||
141 | int inuse; | ||
142 | /* IP callback */ | ||
143 | ras_ih_cb cb; | ||
144 | /* full of entries */ | ||
145 | unsigned char *ring; | ||
146 | unsigned int ring_size; | ||
147 | unsigned int element_size; | ||
148 | unsigned int aligned_element_size; | ||
149 | unsigned int rptr; | ||
150 | unsigned int wptr; | ||
151 | }; | ||
152 | |||
152 | struct ras_manager { | 153 | struct ras_manager { |
153 | struct ras_common_if head; | 154 | struct ras_common_if head; |
154 | /* reference count */ | 155 | /* reference count */ |