aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c115
1 files changed, 102 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ca9c7d1ede2f..1df6b03a3680 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -200,14 +200,87 @@ static const struct file_operations amdgpu_ras_debugfs_ops = {
200 .llseek = default_llseek 200 .llseek = default_llseek
201}; 201};
202 202
203static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
204{
205 int i;
206
207 for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
208 *block_id = i;
209 if (strcmp(name, ras_block_str(i)) == 0)
210 return 0;
211 }
212 return -EINVAL;
213}
214
215static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
216 const char __user *buf, size_t size,
217 loff_t *pos, struct ras_debug_if *data)
218{
219 ssize_t s = min_t(u64, 64, size);
220 char str[65];
221 char block_name[33];
222 char err[9] = "ue";
223 int op = -1;
224 int block_id;
225 u64 address, value;
226
227 if (*pos)
228 return -EINVAL;
229 *pos = size;
230
231 memset(str, 0, sizeof(str));
232 memset(data, 0, sizeof(*data));
233
234 if (copy_from_user(str, buf, s))
235 return -EINVAL;
236
237 if (sscanf(str, "disable %32s", block_name) == 1)
238 op = 0;
239 else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
240 op = 1;
241 else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
242 op = 2;
243 else if (sscanf(str, "%32s", block_name) == 1)
244 /* ascii string, but commands are not matched. */
245 return -EINVAL;
246
247 if (op != -1) {
248 if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
249 return -EINVAL;
250
251 data->head.block = block_id;
252 data->head.type = memcmp("ue", err, 2) == 0 ?
253 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE :
254 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
255 data->op = op;
256
257 if (op == 2) {
258 if (sscanf(str, "%*s %*s %*s %llu %llu",
259 &address, &value) != 2)
260 if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx",
261 &address, &value) != 2)
262 return -EINVAL;
263 data->inject.address = address;
264 data->inject.value = value;
265 }
266 } else {
267 if (size < sizeof(data))
268 return -EINVAL;
269
270 if (copy_from_user(data, buf, sizeof(*data)))
271 return -EINVAL;
272 }
273
274 return 0;
275}
203/* 276/*
204 * DOC: ras debugfs control interface 277 * DOC: ras debugfs control interface
205 * 278 *
206 * It accepts struct ras_debug_if who has two members. 279 * It accepts struct ras_debug_if who has two members.
207 * 280 *
208 * First member: ras_debug_if::head or ras_debug_if::inject. 281 * First member: ras_debug_if::head or ras_debug_if::inject.
209 * It is used to indicate which IP block will be under control. 282 *
210 * Its contents are not human readable, IOW, write it by your programs. 283 * head is used to indicate which IP block will be under control.
211 * 284 *
212 * head has four members, they are block, type, sub_block_index, name. 285 * head has four members, they are block, type, sub_block_index, name.
213 * block: which IP will be under control. 286 * block: which IP will be under control.
@@ -225,6 +298,28 @@ static const struct file_operations amdgpu_ras_debugfs_ops = {
225 * 1: enable RAS on the block. Take ::head as its data. 298 * 1: enable RAS on the block. Take ::head as its data.
226 * 2: inject errors on the block. Take ::inject as its data. 299 * 2: inject errors on the block. Take ::inject as its data.
227 * 300 *
301 * How to use the interface?
302 * programs:
303 * copy the struct ras_debug_if in your codes and initialize it.
304 * write the struct to the control node.
305 *
306 * bash:
307 * echo op block [error [address value]] > .../ras/ras_ctrl
308 * op: disable, enable, inject
309 * disable: only block is needed
310 * enable: block and error are needed
311 * inject: error, address, value are needed
312 * block: umc, smda, gfx, .........
313 * see ras_block_string[] for details
314 * error: ue, ce
315 * ue: multi_uncorrectable
316 * ce: single_correctable
317 *
318 * here are some examples for bash commands,
319 * echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
320 * echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
321 * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
322 *
228 * How to check the result? 323 * How to check the result?
229 * 324 *
230 * For disable/enable, please check ras features at 325 * For disable/enable, please check ras features at
@@ -243,19 +338,10 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
243 struct ras_debug_if data; 338 struct ras_debug_if data;
244 int ret = 0; 339 int ret = 0;
245 340
246 if (size < sizeof(data)) 341 ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
247 return -EINVAL; 342 if (ret)
248
249 memset(&data, 0, sizeof(data));
250
251 if (*pos)
252 return -EINVAL;
253
254 if (copy_from_user(&data, buf, sizeof(data)))
255 return -EINVAL; 343 return -EINVAL;
256 344
257 *pos = size;
258
259 if (!amdgpu_ras_is_supported(adev, data.head.block)) 345 if (!amdgpu_ras_is_supported(adev, data.head.block))
260 return -EINVAL; 346 return -EINVAL;
261 347
@@ -269,6 +355,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
269 case 2: 355 case 2:
270 ret = amdgpu_ras_error_inject(adev, &data.inject); 356 ret = amdgpu_ras_error_inject(adev, &data.inject);
271 break; 357 break;
358 default:
359 ret = -EINVAL;
360 break;
272 }; 361 };
273 362
274 if (ret) 363 if (ret)