aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFenghua Yu <fenghua.yu@intel.com>2006-12-08 19:14:22 -0500
committerTony Luck <tony.luck@intel.com>2007-01-29 18:26:19 -0500
commitbf6285278418f1dc6f07296bbb286da0bfe26d5d (patch)
treea1cf0a46e07e0641be80e35fa05bdeaa74f9a90b
parente9ef08bdc189e98610bc4b9a6e6f19bc3793b2c8 (diff)
[IA64] Itanium MC Error Injection Tool: Doc and sample application
This patch contains a documention and sample application. Since the sample application has ~1000 lines of code, it might not be suitable in a kernel documention in kenrel tree. If you think this is not good place to hold the sample application, please let me know and I'm open to other choices e.g. sourceforge etc. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--Documentation/ia64/err_inject.txt1068
1 files changed, 1068 insertions, 0 deletions
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
new file mode 100644
index 000000000000..26487c172cfc
--- /dev/null
+++ b/Documentation/ia64/err_inject.txt
@@ -0,0 +1,1068 @@
1
2IPF Machine Check (MC) error inject tool
3========================================
4
5IPF Machine Check (MC) error inject tool is used to inject MC
6errors from Linux. The tool is a test bed for IPF MC work flow including
7hardware correctable error handling, OS recoverable error handling, MC
8event logging, etc.
9
10The tool includes two parts: a kernel driver and a user application
11sample. The driver provides interface to PAL to inject error
12and query error injection capabilities. The driver code is in
13arch/ia64/kernel/err_inject.c. The application sample (shown below)
14provides a combination of various errors and calls the driver's interface
15(sysfs interface) to inject errors or query error injection capabilities.
16
17The tool can be used to test Intel IPF machine MC handling capabilities.
18It's especially useful for people who can not access hardware MC injection
19tool to inject error. It's also very useful to integrate with other
20software test suits to do stressful testing on IPF.
21
22Below is a sample application as part of the whole tool. The sample
23can be used as a working test tool. Or it can be expanded to include
24more features. It also can be a integrated into a libary or other user
25application to have more thorough test.
26
27The sample application takes err.conf as error configuation input. Gcc
28compiles the code. After you install err_inject driver, you can run
29this sample application to inject errors.
30
31Errata: Itanium 2 Processors Specification Update lists some errata against
32the pal_mc_error_inject PAL procedure. The following err.conf has been tested
33on latest Montecito PAL.
34
35err.conf:
36
37#This is configuration file for err_inject_tool.
38#The format of the each line is:
39#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
40#where
41# cpu: logical cpu number the error will be inject in.
42# loop: times the error will be injected.
43# interval: In second. every so often one error is injected.
44# err_type_info, err_struct_info: PAL parameters.
45#
46#Note: All values are hex w/o or w/ 0x prefix.
47
48
49#On cpu2, inject only total 0x10 errors, interval 5 seconds
50#corrected, data cache, hier-2, physical addr(assigned by tool code).
51#working on Montecito latest PAL.
522, 10, 5, 4101, 95
53
54#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
55#corrected, data cache, hier-2, physical addr(assigned by tool code).
56#working on Montecito latest PAL.
574, 10, 5, 4109, 95
58
59#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
60#recoverable, DTR0, hier-2.
61#working on Montecito latest PAL.
620xf, 0x10, 5, 4249, 15
63
64The sample application source code:
65
66err_injection_tool.c:
67
68/*
69 * This program is free software; you can redistribute it and/or modify
70 * it under the terms of the GNU General Public License as published by
71 * the Free Software Foundation; either version 2 of the License, or
72 * (at your option) any later version.
73 *
74 * This program is distributed in the hope that it will be useful, but
75 * WITHOUT ANY WARRANTY; without even the implied warranty of
76 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
77 * NON INFRINGEMENT. See the GNU General Public License for more
78 * details.
79 *
80 * You should have received a copy of the GNU General Public License
81 * along with this program; if not, write to the Free Software
82 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
83 *
84 * Copyright (C) 2006 Intel Co
85 * Fenghua Yu <fenghua.yu@intel.com>
86 *
87 */
88#include <sys/types.h>
89#include <sys/stat.h>
90#include <fcntl.h>
91#include <stdio.h>
92#include <sched.h>
93#include <unistd.h>
94#include <stdlib.h>
95#include <stdarg.h>
96#include <string.h>
97#include <errno.h>
98#include <time.h>
99#include <sys/ipc.h>
100#include <sys/sem.h>
101#include <sys/wait.h>
102#include <sys/mman.h>
103#include <sys/shm.h>
104
105#define MAX_FN_SIZE 256
106#define MAX_BUF_SIZE 256
107#define DATA_BUF_SIZE 256
108#define NR_CPUS 512
109#define MAX_TASK_NUM 2048
110#define MIN_INTERVAL 5 // seconds
111#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
112#define PARA_FIELD_NUM 5
113#define MASK_SIZE (NR_CPUS/64)
114#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/"
115
116int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
117
118int verbose;
119#define vbprintf if (verbose) printf
120
121int log_info(int cpu, const char *fmt, ...)
122{
123 FILE *log;
124 char fn[MAX_FN_SIZE];
125 char buf[MAX_BUF_SIZE];
126 va_list args;
127
128 sprintf(fn, "%d.log", cpu);
129 log=fopen(fn, "a+");
130 if (log==NULL) {
131 perror("Error open:");
132 return -1;
133 }
134
135 va_start(args, fmt);
136 vprintf(fmt, args);
137 memset(buf, 0, MAX_BUF_SIZE);
138 vsprintf(buf, fmt, args);
139 va_end(args);
140
141 fwrite(buf, sizeof(buf), 1, log);
142 fclose(log);
143
144 return 0;
145}
146
147typedef unsigned long u64;
148typedef unsigned int u32;
149
150typedef union err_type_info_u {
151 struct {
152 u64 mode : 3, /* 0-2 */
153 err_inj : 3, /* 3-5 */
154 err_sev : 2, /* 6-7 */
155 err_struct : 5, /* 8-12 */
156 struct_hier : 3, /* 13-15 */
157 reserved : 48; /* 16-63 */
158 } err_type_info_u;
159 u64 err_type_info;
160} err_type_info_t;
161
162typedef union err_struct_info_u {
163 struct {
164 u64 siv : 1, /* 0 */
165 c_t : 2, /* 1-2 */
166 cl_p : 3, /* 3-5 */
167 cl_id : 3, /* 6-8 */
168 cl_dp : 1, /* 9 */
169 reserved1 : 22, /* 10-31 */
170 tiv : 1, /* 32 */
171 trigger : 4, /* 33-36 */
172 trigger_pl : 3, /* 37-39 */
173 reserved2 : 24; /* 40-63 */
174 } err_struct_info_cache;
175 struct {
176 u64 siv : 1, /* 0 */
177 tt : 2, /* 1-2 */
178 tc_tr : 2, /* 3-4 */
179 tr_slot : 8, /* 5-12 */
180 reserved1 : 19, /* 13-31 */
181 tiv : 1, /* 32 */
182 trigger : 4, /* 33-36 */
183 trigger_pl : 3, /* 37-39 */
184 reserved2 : 24; /* 40-63 */
185 } err_struct_info_tlb;
186 struct {
187 u64 siv : 1, /* 0 */
188 regfile_id : 4, /* 1-4 */
189 reg_num : 7, /* 5-11 */
190 reserved1 : 20, /* 12-31 */
191 tiv : 1, /* 32 */
192 trigger : 4, /* 33-36 */
193 trigger_pl : 3, /* 37-39 */
194 reserved2 : 24; /* 40-63 */
195 } err_struct_info_register;
196 struct {
197 u64 reserved;
198 } err_struct_info_bus_processor_interconnect;
199 u64 err_struct_info;
200} err_struct_info_t;
201
202typedef union err_data_buffer_u {
203 struct {
204 u64 trigger_addr; /* 0-63 */
205 u64 inj_addr; /* 64-127 */
206 u64 way : 5, /* 128-132 */
207 index : 20, /* 133-152 */
208 : 39; /* 153-191 */
209 } err_data_buffer_cache;
210 struct {
211 u64 trigger_addr; /* 0-63 */
212 u64 inj_addr; /* 64-127 */
213 u64 way : 5, /* 128-132 */
214 index : 20, /* 133-152 */
215 reserved : 39; /* 153-191 */
216 } err_data_buffer_tlb;
217 struct {
218 u64 trigger_addr; /* 0-63 */
219 } err_data_buffer_register;
220 struct {
221 u64 reserved; /* 0-63 */
222 } err_data_buffer_bus_processor_interconnect;
223 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
224} err_data_buffer_t;
225
226typedef union capabilities_u {
227 struct {
228 u64 i : 1,
229 d : 1,
230 rv : 1,
231 tag : 1,
232 data : 1,
233 mesi : 1,
234 dp : 1,
235 reserved1 : 3,
236 pa : 1,
237 va : 1,
238 wi : 1,
239 reserved2 : 20,
240 trigger : 1,
241 trigger_pl : 1,
242 reserved3 : 30;
243 } capabilities_cache;
244 struct {
245 u64 d : 1,
246 i : 1,
247 rv : 1,
248 tc : 1,
249 tr : 1,
250 reserved1 : 27,
251 trigger : 1,
252 trigger_pl : 1,
253 reserved2 : 30;
254 } capabilities_tlb;
255 struct {
256 u64 gr_b0 : 1,
257 gr_b1 : 1,
258 fr : 1,
259 br : 1,
260 pr : 1,
261 ar : 1,
262 cr : 1,
263 rr : 1,
264 pkr : 1,
265 dbr : 1,
266 ibr : 1,
267 pmc : 1,
268 pmd : 1,
269 reserved1 : 3,
270 regnum : 1,
271 reserved2 : 15,
272 trigger : 1,
273 trigger_pl : 1,
274 reserved3 : 30;
275 } capabilities_register;
276 struct {
277 u64 reserved;
278 } capabilities_bus_processor_interconnect;
279} capabilities_t;
280
281typedef struct resources_s {
282 u64 ibr0 : 1,
283 ibr2 : 1,
284 ibr4 : 1,
285 ibr6 : 1,
286 dbr0 : 1,
287 dbr2 : 1,
288 dbr4 : 1,
289 dbr6 : 1,
290 reserved : 48;
291} resources_t;
292
293
294long get_page_size(void)
295{
296 long page_size=sysconf(_SC_PAGESIZE);
297 return page_size;
298}
299
300#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
301#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
302#define SHM_VA 0x2000000100000000
303
304int shmid;
305void *shmaddr;
306
307int create_shm(void)
308{
309 key_t key;
310 char fn[MAX_FN_SIZE];
311
312 /* cpu0 is always existing */
313 sprintf(fn, PATH_FORMAT, 0);
314 if ((key = ftok(fn, 's')) == -1) {
315 perror("ftok");
316 return -1;
317 }
318
319 shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
320 if (shmid == -1) {
321 if (errno==EEXIST) {
322 shmid = shmget(key, SHM_SIZE, 0);
323 if (shmid == -1) {
324 perror("shmget");
325 return -1;
326 }
327 }
328 else {
329 perror("shmget");
330 return -1;
331 }
332 }
333 vbprintf("shmid=%d", shmid);
334
335 /* connect to the segment: */
336 shmaddr = shmat(shmid, (void *)SHM_VA, 0);
337 if (shmaddr == (void*)-1) {
338 perror("shmat");
339 return -1;
340 }
341
342 memset(shmaddr, 0, SHM_SIZE);
343 mlock(shmaddr, SHM_SIZE);
344
345 return 0;
346}
347
348int free_shm()
349{
350 munlock(shmaddr, SHM_SIZE);
351 shmdt(shmaddr);
352 semctl(shmid, 0, IPC_RMID);
353
354 return 0;
355}
356
357#ifdef _SEM_SEMUN_UNDEFINED
358union semun
359{
360 int val;
361 struct semid_ds *buf;
362 unsigned short int *array;
363 struct seminfo *__buf;
364};
365#endif
366
367u32 mode=1; /* 1: physical mode; 2: virtual mode. */
368int one_lock=1;
369key_t key[NR_CPUS];
370int semid[NR_CPUS];
371
372int create_sem(int cpu)
373{
374 union semun arg;
375 char fn[MAX_FN_SIZE];
376 int sid;
377
378 sprintf(fn, PATH_FORMAT, cpu);
379 sprintf(fn, "%s/%s", fn, "err_type_info");
380 if ((key[cpu] = ftok(fn, 'e')) == -1) {
381 perror("ftok");
382 return -1;
383 }
384
385 if (semid[cpu]!=0)
386 return 0;
387
388 /* clear old semaphore */
389 if ((sid = semget(key[cpu], 1, 0)) != -1)
390 semctl(sid, 0, IPC_RMID);
391
392 /* get one semaphore */
393 if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
394 perror("semget");
395 printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
396 (u64)key[cpu]);
397 return -1;
398 }
399
400 vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
401 (u64)key[cpu]);
402 /* initialize the semaphore to 1: */
403 arg.val = 1;
404 if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
405 perror("semctl");
406 return -1;
407 }
408
409 return 0;
410}
411
412static int lock(int cpu)
413{
414 struct sembuf lock;
415
416 lock.sem_num = cpu;
417 lock.sem_op = 1;
418 semop(semid[cpu], &lock, 1);
419
420 return 0;
421}
422
423static int unlock(int cpu)
424{
425 struct sembuf unlock;
426
427 unlock.sem_num = cpu;
428 unlock.sem_op = -1;
429 semop(semid[cpu], &unlock, 1);
430
431 return 0;
432}
433
434void free_sem(int cpu)
435{
436 semctl(semid[cpu], 0, IPC_RMID);
437}
438
439int wr_multi(char *fn, unsigned long *data, int size)
440{
441 int fd;
442 char buf[MAX_BUF_SIZE];
443 int ret;
444
445 if (size==1)
446 sprintf(buf, "%lx", *data);
447 else if (size==3)
448 sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
449 else {
450 fprintf(stderr,"write to file with wrong size!\n");
451 return -1;
452 }
453
454 fd=open(fn, O_RDWR);
455 if (!fd) {
456 perror("Error:");
457 return -1;
458 }
459 ret=write(fd, buf, sizeof(buf));
460 close(fd);
461 return ret;
462}
463
464int wr(char *fn, unsigned long data)
465{
466 return wr_multi(fn, &data, 1);
467}
468
469int rd(char *fn, unsigned long *data)
470{
471 int fd;
472 char buf[MAX_BUF_SIZE];
473
474 fd=open(fn, O_RDONLY);
475 if (fd<0) {
476 perror("Error:");
477 return -1;
478 }
479 read(fd, buf, MAX_BUF_SIZE);
480 *data=strtoul(buf, NULL, 16);
481 close(fd);
482 return 0;
483}
484
485int rd_status(char *path, int *status)
486{
487 char fn[MAX_FN_SIZE];
488 sprintf(fn, "%s/status", path);
489 if (rd(fn, (u64*)status)<0) {
490 perror("status reading error.\n");
491 return -1;
492 }
493
494 return 0;
495}
496
497int rd_capabilities(char *path, u64 *capabilities)
498{
499 char fn[MAX_FN_SIZE];
500 sprintf(fn, "%s/capabilities", path);
501 if (rd(fn, capabilities)<0) {
502 perror("capabilities reading error.\n");
503 return -1;
504 }
505
506 return 0;
507}
508
509int rd_all(char *path)
510{
511 unsigned long err_type_info, err_struct_info, err_data_buffer;
512 int status;
513 unsigned long capabilities, resources;
514 char fn[MAX_FN_SIZE];
515
516 sprintf(fn, "%s/err_type_info", path);
517 if (rd(fn, &err_type_info)<0) {
518 perror("err_type_info reading error.\n");
519 return -1;
520 }
521 printf("err_type_info=%lx\n", err_type_info);
522
523 sprintf(fn, "%s/err_struct_info", path);
524 if (rd(fn, &err_struct_info)<0) {
525 perror("err_struct_info reading error.\n");
526 return -1;
527 }
528 printf("err_struct_info=%lx\n", err_struct_info);
529
530 sprintf(fn, "%s/err_data_buffer", path);
531 if (rd(fn, &err_data_buffer)<0) {
532 perror("err_data_buffer reading error.\n");
533 return -1;
534 }
535 printf("err_data_buffer=%lx\n", err_data_buffer);
536
537 sprintf(fn, "%s/status", path);
538 if (rd("status", (u64*)&status)<0) {
539 perror("status reading error.\n");
540 return -1;
541 }
542 printf("status=%d\n", status);
543
544 sprintf(fn, "%s/capabilities", path);
545 if (rd(fn,&capabilities)<0) {
546 perror("capabilities reading error.\n");
547 return -1;
548 }
549 printf("capabilities=%lx\n", capabilities);
550
551 sprintf(fn, "%s/resources", path);
552 if (rd(fn, &resources)<0) {
553 perror("resources reading error.\n");
554 return -1;
555 }
556 printf("resources=%lx\n", resources);
557
558 return 0;
559}
560
561int query_capabilities(char *path, err_type_info_t err_type_info,
562 u64 *capabilities)
563{
564 char fn[MAX_FN_SIZE];
565 err_struct_info_t err_struct_info;
566 err_data_buffer_t err_data_buffer;
567
568 err_struct_info.err_struct_info=0;
569 memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
570
571 sprintf(fn, "%s/err_type_info", path);
572 wr(fn, err_type_info.err_type_info);
573 sprintf(fn, "%s/err_struct_info", path);
574 wr(fn, 0x0);
575 sprintf(fn, "%s/err_data_buffer", path);
576 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
577
578 // Fire pal_mc_error_inject procedure.
579 sprintf(fn, "%s/call_start", path);
580 wr(fn, mode);
581
582 if (rd_capabilities(path, capabilities)<0)
583 return -1;
584
585 return 0;
586}
587
588int query_all_capabilities()
589{
590 int status;
591 err_type_info_t err_type_info;
592 int err_sev, err_struct, struct_hier;
593 int cap=0;
594 u64 capabilities;
595 char path[MAX_FN_SIZE];
596
597 err_type_info.err_type_info=0; // Initial
598 err_type_info.err_type_info_u.mode=0; // Query mode;
599 err_type_info.err_type_info_u.err_inj=0;
600
601 printf("All capabilities implemented in pal_mc_error_inject:\n");
602 sprintf(path, PATH_FORMAT ,0);
603 for (err_sev=0;err_sev<3;err_sev++)
604 for (err_struct=0;err_struct<5;err_struct++)
605 for (struct_hier=0;struct_hier<5;struct_hier++)
606 {
607 status=-1;
608 capabilities=0;
609 err_type_info.err_type_info_u.err_sev=err_sev;
610 err_type_info.err_type_info_u.err_struct=err_struct;
611 err_type_info.err_type_info_u.struct_hier=struct_hier;
612
613 if (query_capabilities(path, err_type_info, &capabilities)<0)
614 continue;
615
616 if (rd_status(path, &status)<0)
617 continue;
618
619 if (status==0) {
620 cap=1;
621 printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
622 err_sev, err_struct, struct_hier);
623 printf("capabilities 0x%lx\n", capabilities);
624 }
625 }
626 if (!cap) {
627 printf("No capabilities supported.\n");
628 return 0;
629 }
630
631 return 0;
632}
633
634int err_inject(int cpu, char *path, err_type_info_t err_type_info,
635 err_struct_info_t err_struct_info,
636 err_data_buffer_t err_data_buffer)
637{
638 int status;
639 char fn[MAX_FN_SIZE];
640
641 log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
642 err_type_info.err_type_info,
643 err_struct_info.err_struct_info);
644 log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
645 err_data_buffer.err_data_buffer[0],
646 err_data_buffer.err_data_buffer[1],
647 err_data_buffer.err_data_buffer[2]);
648 sprintf(fn, "%s/err_type_info", path);
649 wr(fn, err_type_info.err_type_info);
650 sprintf(fn, "%s/err_struct_info", path);
651 wr(fn, err_struct_info.err_struct_info);
652 sprintf(fn, "%s/err_data_buffer", path);
653 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
654
655 // Fire pal_mc_error_inject procedure.
656 sprintf(fn, "%s/call_start", path);
657 wr(fn,mode);
658
659 if (rd_status(path, &status)<0) {
660 vbprintf("fail: read status\n");
661 return -100;
662 }
663
664 if (status!=0) {
665 log_info(cpu, "fail: status=%d\n", status);
666 return status;
667 }
668
669 return status;
670}
671
672static int construct_data_buf(char *path, err_type_info_t err_type_info,
673 err_struct_info_t err_struct_info,
674 err_data_buffer_t *err_data_buffer,
675 void *va1)
676{
677 char fn[MAX_FN_SIZE];
678 u64 virt_addr=0, phys_addr=0;
679
680 vbprintf("va1=%lx\n", (u64)va1);
681 memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
682
683 switch (err_type_info.err_type_info_u.err_struct) {
684 case 1: // Cache
685 switch (err_struct_info.err_struct_info_cache.cl_id) {
686 case 1: //Virtual addr
687 err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
688 break;
689 case 2: //Phys addr
690 sprintf(fn, "%s/virtual_to_phys", path);
691 virt_addr=(u64)va1;
692 if (wr(fn,virt_addr)<0)
693 return -1;
694 rd(fn, &phys_addr);
695 err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
696 break;
697 default:
698 printf("Not supported cl_id\n");
699 break;
700 }
701 break;
702 case 2: // TLB
703 break;
704 case 3: // Register file
705 break;
706 case 4: // Bus/system interconnect
707 default:
708 printf("Not supported err_struct\n");
709 break;
710 }
711
712 return 0;
713}
714
715typedef struct {
716 u64 cpu;
717 u64 loop;
718 u64 interval;
719 u64 err_type_info;
720 u64 err_struct_info;
721 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
722} parameters_t;
723
724parameters_t line_para;
725int para;
726
727static int empty_data_buffer(u64 *err_data_buffer)
728{
729 int empty=1;
730 int i;
731
732 for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
733 if (err_data_buffer[i]!=-1)
734 empty=0;
735
736 return empty;
737}
738
739int err_inj()
740{
741 err_type_info_t err_type_info;
742 err_struct_info_t err_struct_info;
743 err_data_buffer_t err_data_buffer;
744 int count;
745 FILE *fp;
746 unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
747 u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
748 int num;
749 int i;
750 char path[MAX_FN_SIZE];
751 parameters_t parameters[MAX_TASK_NUM]={};
752 pid_t child_pid[MAX_TASK_NUM];
753 time_t current_time;
754 int status;
755
756 if (!para) {
757 fp=fopen("err.conf", "r");
758 if (fp==NULL) {
759 perror("Error open err.conf");
760 return -1;
761 }
762
763 num=0;
764 while (!feof(fp)) {
765 char buf[256];
766 memset(buf,0,256);
767 fgets(buf, 256, fp);
768 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
769 &cpu, &loop, &interval,&err_type_info_conf,
770 &err_struct_info_conf,
771 &err_data_buffer_conf[0],
772 &err_data_buffer_conf[1],
773 &err_data_buffer_conf[2]);
774 if (count!=PARA_FIELD_NUM+3) {
775 err_data_buffer_conf[0]=-1;
776 err_data_buffer_conf[1]=-1;
777 err_data_buffer_conf[2]=-1;
778 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
779 &cpu, &loop, &interval,&err_type_info_conf,
780 &err_struct_info_conf);
781 if (count!=PARA_FIELD_NUM)
782 continue;
783 }
784
785 parameters[num].cpu=cpu;
786 parameters[num].loop=loop;
787 parameters[num].interval= interval>MIN_INTERVAL
788 ?interval:MIN_INTERVAL;
789 parameters[num].err_type_info=err_type_info_conf;
790 parameters[num].err_struct_info=err_struct_info_conf;
791 memcpy(parameters[num++].err_data_buffer,
792 err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
793
794 if (num>=MAX_TASK_NUM)
795 break;
796 }
797 }
798 else {
799 parameters[0].cpu=line_para.cpu;
800 parameters[0].loop=line_para.loop;
801 parameters[0].interval= line_para.interval>MIN_INTERVAL
802 ?line_para.interval:MIN_INTERVAL;
803 parameters[0].err_type_info=line_para.err_type_info;
804 parameters[0].err_struct_info=line_para.err_struct_info;
805 memcpy(parameters[0].err_data_buffer,
806 line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
807
808 num=1;
809 }
810
811 /* Create semaphore: If one_lock, one semaphore for all processors.
812 Otherwise, one sempaphore for each processor. */
813 if (one_lock) {
814 if (create_sem(0)) {
815 printf("Can not create semaphore...exit\n");
816 free_sem(0);
817 return -1;
818 }
819 }
820 else {
821 for (i=0;i<num;i++) {
822 if (create_sem(parameters[i].cpu)) {
823 printf("Can not create semaphore for cpu%d...exit\n",i);
824 free_sem(parameters[num].cpu);
825 return -1;
826 }
827 }
828 }
829
830 /* Create a shm segment which will be used to inject/consume errors on.*/
831 if (create_shm()==-1) {
832 printf("Error to create shm...exit\n");
833 return -1;
834 }
835
836 for (i=0;i<num;i++) {
837 pid_t pid;
838
839 current_time=time(NULL);
840 log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
841 log_info(parameters[i].cpu, "Configurations:\n");
842 log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
843 parameters[i].cpu,
844 parameters[i].loop,
845 parameters[i].interval);
846 log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
847 parameters[i].err_type_info,
848 parameters[i].err_struct_info);
849
850 sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
851 err_type_info.err_type_info=parameters[i].err_type_info;
852 err_struct_info.err_struct_info=parameters[i].err_struct_info;
853 memcpy(err_data_buffer.err_data_buffer,
854 parameters[i].err_data_buffer,
855 ERR_DATA_BUFFER_SIZE*8);
856
857 pid=fork();
858 if (pid==0) {
859 unsigned long mask[MASK_SIZE];
860 int j, k;
861
862 void *va1, *va2;
863
864 /* Allocate two memory areas va1 and va2 in shm */
865 va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
866 va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
867
868 vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
869 memset(va1, 0x1, PAGE_SIZE);
870 memset(va2, 0x2, PAGE_SIZE);
871
872 if (empty_data_buffer(err_data_buffer.err_data_buffer))
873 /* If not specified yet, construct data buffer
874 * with va1
875 */
876 construct_data_buf(path, err_type_info,
877 err_struct_info, &err_data_buffer,va1);
878
879 for (j=0;j<MASK_SIZE;j++)
880 mask[j]=0;
881
882 cpu=parameters[i].cpu;
883 k = cpu%64;
884 j = cpu/64;
885 mask[j]=1<<k;
886
887 if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
888 perror("Error sched_setaffinity:");
889 return -1;
890 }
891
892 for (j=0; j<parameters[i].loop; j++) {
893 log_info(parameters[i].cpu,"Injection ");
894 log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
895
896 parameters[i].cpu,j+1, parameters[i].loop);
897
898 /* Hold the lock */
899 if (one_lock)
900 lock(0);
901 else
902 /* Hold lock on this cpu */
903 lock(parameters[i].cpu);
904
905 if ((status=err_inject(parameters[i].cpu,
906 path, err_type_info,
907 err_struct_info, err_data_buffer))
908 ==0) {
909 /* consume the error for "inject only"*/
910 memcpy(va2, va1, PAGE_SIZE);
911 memcpy(va1, va2, PAGE_SIZE);
912 log_info(parameters[i].cpu,
913 "successful\n");
914 }
915 else {
916 log_info(parameters[i].cpu,"fail:");
917 log_info(parameters[i].cpu,
918 "status=%d\n", status);
919 unlock(parameters[i].cpu);
920 break;
921 }
922 if (one_lock)
923 /* Release the lock */
924 unlock(0);
925 /* Release lock on this cpu */
926 else
927 unlock(parameters[i].cpu);
928
929 if (j < parameters[i].loop-1)
930 sleep(parameters[i].interval);
931 }
932 current_time=time(NULL);
933 log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
934 return 0;
935 }
936 else if (pid<0) {
937 perror("Error fork:");
938 continue;
939 }
940 child_pid[i]=pid;
941 }
942 for (i=0;i<num;i++)
943 waitpid(child_pid[i], NULL, 0);
944
945 if (one_lock)
946 free_sem(0);
947 else
948 for (i=0;i<num;i++)
949 free_sem(parameters[i].cpu);
950
951 printf("All done.\n");
952
953 return 0;
954}
955
956void help()
957{
958 printf("err_inject_tool:\n");
959 printf("\t-q: query all capabilities. default: off\n");
960 printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
961 printf("\t-i: inject errors. default: off\n");
962 printf("\t-l: one lock per cpu. default: one lock for all\n");
963 printf("\t-e: error parameters:\n");
964 printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
965 printf("\t\t cpu: logical cpu number the error will be inject in.\n");
966 printf("\t\t loop: times the error will be injected.\n");
967 printf("\t\t interval: In second. every so often one error is injected.\n");
968 printf("\t\t err_type_info, err_struct_info: PAL parameters.\n");
969 printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n");
970 printf("\t\t it's constructed by tool automatically. Be\n");
971 printf("\t\t careful to provide err_data_buffer and make\n");
972 printf("\t\t sure it's working with the environment.\n");
973 printf("\t Note:no space between error parameters.\n");
974 printf("\t default: Take error parameters from err.conf instead of command line.\n");
975 printf("\t-v: verbose. default: off\n");
976 printf("\t-h: help\n\n");
977 printf("The tool will take err.conf file as ");
978 printf("input to inject single or multiple errors ");
979 printf("on one or multiple cpus in parallel.\n");
980}
981
982int main(int argc, char **argv)
983{
984 char c;
985 int do_err_inj=0;
986 int do_query_all=0;
987 int count;
988 u32 m;
989
990 /* Default one lock for all cpu's */
991 one_lock=1;
992 while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
993 switch (c) {
994 case 'm': /* Procedure mode. 1: phys 2: virt */
995 count=sscanf(optarg, "%x", &m);
996 if (count!=1 || (m!=1 && m!=2)) {
997 printf("Wrong mode number.\n");
998 help();
999 return -1;
1000 }
1001 mode=m;
1002 break;
1003 case 'i': /* Inject errors */
1004 do_err_inj=1;
1005 break;
1006 case 'q': /* Query */
1007 do_query_all=1;
1008 break;
1009 case 'v': /* Verbose */
1010 verbose=1;
1011 break;
1012 case 'l': /* One lock per cpu */
1013 one_lock=0;
1014 break;
1015 case 'e': /* error arguments */
1016 /* Take parameters:
1017 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
1018 * err_data_buffer is optional. Recommend not to specify
1019 * err_data_buffer. Better to use tool to generate it.
1020 */
1021 count=sscanf(optarg,
1022 "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
1023 &line_para.cpu,
1024 &line_para.loop,
1025 &line_para.interval,
1026 &line_para.err_type_info,
1027 &line_para.err_struct_info,
1028 &line_para.err_data_buffer[0],
1029 &line_para.err_data_buffer[1],
1030 &line_para.err_data_buffer[2]);
1031 if (count!=PARA_FIELD_NUM+3) {
1032 line_para.err_data_buffer[0]=-1,
1033 line_para.err_data_buffer[1]=-1,
1034 line_para.err_data_buffer[2]=-1;
1035 count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
1036 &line_para.cpu,
1037 &line_para.loop,
1038 &line_para.interval,
1039 &line_para.err_type_info,
1040 &line_para.err_struct_info);
1041 if (count!=PARA_FIELD_NUM) {
1042 printf("Wrong error arguments.\n");
1043 help();
1044 return -1;
1045 }
1046 }
1047 para=1;
1048 break;
1049 continue;
1050 break;
1051 case 'h':
1052 help();
1053 return 0;
1054 default:
1055 break;
1056 }
1057
1058 if (do_query_all)
1059 query_all_capabilities();
1060 if (do_err_inj)
1061 err_inj();
1062
1063 if (!do_query_all && !do_err_inj)
1064 help();
1065
1066 return 0;
1067}
1068