From c6eae929fd74f11ab13d469a38bffd4e8ba50fb5 Mon Sep 17 00:00:00 2001 From: Nicolas Benech Date: Tue, 25 Sep 2018 14:37:16 -0400 Subject: gpu: nvgpu: posix: Multithreading for unit tests Add a -j argument to enable running unit tests on several threads. Also adds signal handling to prevent a fatal error in one thread from killing the whole unit test framework. JIRA NVGPU-1043 Change-Id: I891a547640cd005a50ffa5c06367ed46c54de012 Signed-off-by: Nicolas Benech Reviewed-on: https://git-master.nvidia.com/r/1847740 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: mobile promotions Tested-by: mobile promotions --- userspace/include/unit/args.h | 1 + userspace/include/unit/unit.h | 4 ++ userspace/src/args.c | 13 +++- userspace/src/exec.c | 142 +++++++++++++++++++++++++++++++++++++----- userspace/src/results.c | 26 ++++++-- userspace/src/unit_main.c | 8 +++ 6 files changed, 173 insertions(+), 21 deletions(-) (limited to 'userspace') diff --git a/userspace/include/unit/args.h b/userspace/include/unit/args.h index def84a29..708a1655 100644 --- a/userspace/include/unit/args.h +++ b/userspace/include/unit/args.h @@ -42,6 +42,7 @@ struct unit_fw_args { bool help; int verbose_lvl; bool no_color; + int thread_count; const char *unit_name; const char *unit_load_path; diff --git a/userspace/include/unit/unit.h b/userspace/include/unit/unit.h index 2f93bab5..442a73f1 100644 --- a/userspace/include/unit/unit.h +++ b/userspace/include/unit/unit.h @@ -23,6 +23,8 @@ #ifndef __UNIT_UNIT_H__ #define __UNIT_UNIT_H__ +#include + struct gk20a; struct unit_module; @@ -84,6 +86,8 @@ struct unit_module { */ void *lib_handle; struct unit_fw *fw; + + pthread_t thread; }; /* diff --git a/userspace/src/args.c b/userspace/src/args.c index d91c6f6e..cf17c983 100644 --- a/userspace/src/args.c +++ b/userspace/src/args.c @@ -36,11 +36,12 @@ static struct option core_opts[] = { { "no-color", 0, NULL, 'C' }, { "unit-load-path", 1, NULL, 'L' }, + { "num-threads", 1, NULL, 'j' }, { NULL, 0, NULL, 0 } }; -static const char *core_opts_str = "hvqCL:"; +static const char *core_opts_str = "hvqCL:j:"; void core_print_help(struct unit_fw *fw) { @@ -63,6 +64,8 @@ void core_print_help(struct unit_fw *fw) " corrupt that file.\n", " -L, --unit-load-path \n", " Path to where the unit test libraries reside.\n", +" -j, --num-threads \n", +" Number of threads to use while running all tests.\n", "\n", "Note: mandatory arguments to long arguments are mandatory for short\n", "arguments as well.\n", @@ -79,6 +82,7 @@ NULL static void set_arg_defaults(struct unit_fw_args *args) { args->unit_load_path = DEFAULT_ARG_UNIT_LOAD_PATH; + args->thread_count = 1; } /* @@ -121,6 +125,13 @@ int core_parse_args(struct unit_fw *fw, int argc, char **argv) case 'L': args->unit_load_path = optarg; break; + case 'j': + args->thread_count = strtol(optarg, NULL, 10); + if (args->thread_count == 0) { + core_err(fw, "Invalid number of threads\n"); + return -1; + } + break; case '?': args->help = true; return -1; diff --git a/userspace/src/exec.c b/userspace/src/exec.c index b9ba1336..8a99437b 100644 --- a/userspace/src/exec.c +++ b/userspace/src/exec.c @@ -21,6 +21,10 @@ */ #include +#include +#include +#include +#include #include #include @@ -30,20 +34,40 @@ #include +/* + * Sempaphore to limit the number of threads + */ +sem_t unit_thread_semaphore; + +/* + * C11 thread local storage, used to access test context when a signal is + * received (ex: SIGSEGV) in a thread. + */ +_Thread_local struct unit_module *thread_local_module; +_Thread_local struct unit_module_test *thread_local_test; + /* * Execute a module and all its subtests. This function builds a gk20a for the * test to use by executing nvgpu_posix_probe() and nvgpu_posix_cleanup(); */ -static int core_exec_module(struct unit_fw *fw, - struct unit_module *module) +static void *core_exec_module(void *module_param) { unsigned int i; - struct gk20a *g = fw->nvgpu.nvgpu_posix_probe(); + struct unit_module *module = (struct unit_module *) module_param; + struct gk20a *g; + + g = module->fw->nvgpu.nvgpu_posix_probe(); + + if (!g) { + core_msg_color(module->fw, C_RED, + " nvgpu_posix_probe failed: Module %s\n", + module->name); + goto thread_exit; + } - if (!g) - return -1; + core_vbs(module->fw, 1, "Execing module: %s\n", module->name); - core_vbs(fw, 1, "Execing module: %s\n", module->name); + thread_local_module = module; /* * Execute each test within the module. No reinit is done between tests. @@ -53,21 +77,86 @@ static int core_exec_module(struct unit_fw *fw, for (i = 0; i < module->nr_tests; i++) { struct unit_module_test *t = module->tests + i; int test_status; + thread_local_test = t; - core_msg(fw, "Running %s.%s\n", module->name, t->name); + core_msg(module->fw, "Running %s.%s\n", module->name, + t->name); test_status = t->fn(module, g, t->args); if (test_status != UNIT_SUCCESS) - core_msg_color(fw, C_RED, + core_msg_color(module->fw, C_RED, " Unit error! Test %s.%s FAILED!\n", module->name, t->name); - core_add_test_record(fw, module, t, + core_add_test_record(module->fw, module, t, test_status == UNIT_SUCCESS); } - fw->nvgpu.nvgpu_posix_cleanup(g); + module->fw->nvgpu.nvgpu_posix_cleanup(g); + + core_vbs(module->fw, 1, "Module completed: %s\n", module->name); +thread_exit: + sem_post(&unit_thread_semaphore); + return NULL; +} + +/* + * According to POSIX, "Signals which are generated by some action attributable + * to a particular thread, such as a hardware fault, shall be generated for the + * thread that caused the signal to be generated." + * This custom signal handler will be run from within the thread that caused the + * exception. Thanks to the context being saved in local thread storage, it is + * then trivial to report which test case failed, and then terminate the thread. + */ +static void thread_error_handler(int sig, siginfo_t *siginfo, void *context) +{ + core_msg_color(thread_local_module->fw, C_RED, + " Signal %d in Test: %s.%s!\n", sig, + thread_local_module->name, thread_local_test->name); + core_add_test_record(thread_local_module->fw, thread_local_module, + thread_local_test, false); + sem_post(&unit_thread_semaphore); + pthread_exit(NULL); +} + +/* + * Install a custom signal handler for several signals to be used when running + * in multithreaded environment. + */ +static int install_thread_error_handler(void) +{ + struct sigaction action; + int err; + + memset(&action, 0, sizeof(action)); + action.sa_sigaction = &thread_error_handler; + action.sa_flags = SA_SIGINFO; + /* SIGSEGV: Invalid memory reference */ + err = sigaction(SIGSEGV, &action, NULL); + if (err < 0) { + return err; + } + /* SIGILL: Illegal Instruction */ + err = sigaction(SIGILL, &action, NULL); + if (err < 0) { + return err; + } + /* SIGFPE: Floating-point exception */ + err = sigaction(SIGFPE, &action, NULL); + if (err < 0) { + return err; + } + /* SIGBUS: Bus error */ + err = sigaction(SIGBUS, &action, NULL); + if (err < 0) { + return err; + } + /* SIGSYS: Bad system call */ + err = sigaction(SIGSYS, &action, NULL); + if (err < 0) { + return err; + } return 0; } @@ -76,14 +165,39 @@ static int core_exec_module(struct unit_fw *fw, */ int core_exec(struct unit_fw *fw) { - int ret; struct unit_module **modules; + int err = 0; + + core_vbs(fw, 1, "Using %d threads\n", fw->args->thread_count); + sem_init(&unit_thread_semaphore, 0, fw->args->thread_count); + + /* + * If running single threaded, keep the default SIGSEGV handler to make + * interactive debugging easier, otherwise install the custom one. + */ + if (fw->args->thread_count > 1) { + err = install_thread_error_handler(); + if (err != 0) { + core_msg_color(fw, C_RED, + " Failed to install signal handler!\n"); + return err; + } + } for (modules = fw->modules; *modules != NULL; modules++) { - ret = core_exec_module(fw, *modules); + if (fw->args->thread_count == 1) { + core_exec_module(*modules); + } else { + sem_wait(&unit_thread_semaphore); + pthread_create(&((*modules)->thread), NULL, + core_exec_module, (void *) *modules); + } + } - if (ret != 0) - return ret; + if (fw->args->thread_count > 1) { + for (modules = fw->modules; *modules != NULL; modules++) { + pthread_join((*modules)->thread, NULL); + } } return 0; diff --git a/userspace/src/results.c b/userspace/src/results.c index ae077b82..4c30c4db 100644 --- a/userspace/src/results.c +++ b/userspace/src/results.c @@ -22,12 +22,18 @@ #include #include +#include #include #include #include #include +/* + * Mutex to ensure core_add_test_record() is thread safe. + */ +pthread_mutex_t mutex_results = PTHREAD_MUTEX_INITIALIZER; + static int __init_results(struct unit_fw *fw) { struct unit_results *results; @@ -72,16 +78,22 @@ int core_add_test_record(struct unit_fw *fw, bool success) { struct unit_test_record *tr; + int err = 0; + pthread_mutex_lock(&mutex_results); /* - * Dones nothing if results are already inited. + * Does nothing if results are already inited. */ - if (__init_results(fw) != 0) - return -1; + if (__init_results(fw) != 0) { + err = -1; + goto done; + } tr = malloc(sizeof(*tr)); - if (tr == NULL) - return -1; + if (tr == NULL) { + err = -1; + goto done; + } tr->mod = mod; tr->test = test; @@ -97,7 +109,9 @@ int core_add_test_record(struct unit_fw *fw, if (success) fw->results->nr_passing += 1; - return 0; +done: + pthread_mutex_unlock(&mutex_results); + return err; } void core_print_test_status(struct unit_fw *fw) diff --git a/userspace/src/unit_main.c b/userspace/src/unit_main.c index 31c31d50..64344bf0 100644 --- a/userspace/src/unit_main.c +++ b/userspace/src/unit_main.c @@ -72,5 +72,13 @@ int main(int argc, char **argv) core_print_test_status(fw); + if (fw->results->nr_tests == 0) { + /* No tests were run */ + return -1; + } else if ((fw->results->nr_tests - fw->results->nr_passing) != 0) { + /* Some tests failed */ + return -1; + } + return 0; } -- cgit v1.2.2