/** * Copyright 2019 Sims Hill Osborne and Joshua Bakita * * This header provides facilities by which to separably run and time TACLeBench **/ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include // This is only visible if _GNU_SOURCE is defined, and that define does not // come along to places where this file is included. Address this by manually // forcing it into the global namespace. extern int sched_getcpu(); // These constants correspond to the imx6q-sabredb platform #define LINE_SIZE 32 #define L2_SIZE 16*2048*32 #if __arm__ #include #include #endif #define LITMUS 0 #define MC2 0 #define MMDC_PROF 0 #if LITMUS #include #endif #if MMDC_PROF #include "/media/speedy/litmus/tools/mmdc/mmdc.h" #endif #if LITMUS #define SET_UP LOAD_PARAMS SETUP_LITMUS #else #define SET_UP LOAD_PARAMS #endif #if MMDC_PROF #define LOAD_PARAMS LOAD_PARAMS_ITRL SETUP_MMDC #else #define LOAD_PARAMS LOAD_PARAMS_ITRL #endif // Store state globally so that the job can be outside main() // Arrays use float as a comprimise between overflow and size float *_rt_exec_time; #if MMDC_PERF float *_rt_mmdc_read; float *_rt_mmdc_write; #endif long _rt_jobs_complete; long _rt_max_jobs; int _rt_core; int _rt_will_output; struct timespec _rt_start, _rt_end; char *_rt_run_id; char *_rt_our_prog_name; #define _RT_FILENAME_LEN 64 #define LOAD_PARAMS_ITRL \ if (argc != 6) { \ fprintf(stderr, "Usage: %s \n", argv[0]);\ fprintf(stderr, " integer number of iterations. -1 for infitite.\n");\ fprintf(stderr, " 1 to save results, 0 to discard.\n");\ fprintf(stderr, " UNUSED. Core is now auto-detected.\n");\ exit(1);\ }\ _rt_our_prog_name = argv[1];\ _rt_max_jobs = atol(argv[2]);\ _rt_core = sched_getcpu();\ _rt_run_id = argv[4];\ _rt_will_output = atoi(argv[5]);\ if (_rt_max_jobs < 0 && _rt_will_output != 0) {\ fprintf(stderr, "Infinite loops only supported when _rt_will_output is disabled!\n");\ exit(1);\ }\ if (strlen(_rt_run_id) + 5 > _RT_FILENAME_LEN) {\ fprintf(stderr, "Run ID is too large! Keep it to less than %d characters.\n", _RT_FILENAME_LEN);\ exit(1);\ }\ _rt_exec_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\ if (!_rt_exec_time) {\ perror("Unable to allocate buffer for execution times");\ exit(1);\ }\ _rt_jobs_complete = 0;\ mlockall(MCL_CURRENT || MCL_FUTURE); #define SETUP_MMDC \ _rt_mmdc_read = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\ _rt_mmdc_write = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\ if (!_rt_mmdc_read || !_rt_mmdc_write) {\ perror("Unable to allocate buffer for MMDC data");\ exit(1);\ }\ MMDC_PROFILE_RES_t mmdc_res;\ memset(&mmdc_res, 0, sizeof(MMDC_PROFILE_RES_t));\ int fd = open("/dev/mem", O_RDWR, 0);\ if (fd < 0) {\ perror("Unable to open /dev/mem");\ exit(1);\ }\ pMMDC_t mmdc = mmap(NULL, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMDC_P0_IPS_BASE_ADDR);\ if (mmdc == MAP_FAILED) {\ perror("Unable to map MMDC address space");\ exit(1);\ }\ mmdc->madpcr1 = axi_arm1;\ msync(&(mmdc->madpcr1),4,MS_SYNC); #define SETUP_LITMUS \ unsigned int wait = 0; \ if (be_migrate_to_domain(_rt_core) < 0) { \ perror("Unable to migrate to specified CPU"); \ exit(1); \ } \ struct reservation_config res; \ res.id = gettid(); \ res.cpu = cpu; \ res.priority = LITMUS_HIGHEST_PRIORITY; \ /* we take over half the CPU time (these are ns) */ \ res.polling_params.budget = ms2ns(3000); \ res.polling_params.period = ms2ns(3000); \ res.polling_params.offset = 0; \ res.polling_params.relative_deadline = ms2ns(3000); \ /* Not 100% sure that we should use periodic polling */ \ if (reservation_create(PERIODIC_POLLING, &res) < 0) { \ perror("Unable to create reservation"); \ exit(1); \ } \ struct rt_task rt_param; \ init_rt_task_param(&rt_param); \ /* Supposedly the next two parameters are irrelevant when reservations are enabled, but I'm leaving them anyway... */ \ rt_param.exec_cost = ms2ns(999); \ rt_param.period = ms2ns(1000); \ rt_param.priority = LITMUS_HIGHEST_PRIORITY; \ rt_param.cls = RT_CLASS_HARD; \ rt_param.release_policy = TASK_PERIODIC; \ rt_param.budget_policy = NO_ENFORCEMENT; \ rt_param.cpu = cpu; \ if (set_rt_task_param(gettid(), &rt_param) < 0) { \ perror("Unable to set real-time parameters"); \ exit(1); \ } \ if (init_litmus() != 0) { \ perror("init_litmus failed"); \ exit(1); \ } \ MC2_SETUP \ if (task_mode(LITMUS_RT_TASK) != 0) { \ perror("Unable to become real-time task"); \ exit(1); \ } \ if (wait && wait_for_ts_release() != 0) { \ perror("Unable to wait for taskset release"); \ exit(1); \ } #if MC2 #define MC2_SETUP \ struct mc2_task mc2_param; \ mc2_param.res_id = gettid(); \ mc2_param.crit = CRIT_LEVEL_A; \ if (set_mc2_task_param(gettid(), &mc2_param) < 0) { \ perror("Unable to set MC^2 task params"); \ exit(1); \ } \ set_page_color(rt_param.cpu); #else #define MC2_SETUP #endif #define CLEANUP_LITMUS \ if (task_mode(BACKGROUND_TASK) != 0) { \ perror("Unable to become a real-time task"); \ exit(1); \ } \ reservation_destroy(gettid(), rt_param.cpu); #if __arm__ // On ARM, manually flush the cache #define FLUSH_CACHES \ volatile uint8_t buffer[L2_SIZE * 4]; \ for (uint32_t j = 0; j < 4; j++) \ for (uint32_t i = 0; i < L2_SIZE * 4; i += LINE_SIZE) \ buffer[i]++; #else // On x86 call the wbinvld instruction (it's in a kernel module due to it being ring-0) #define FLUSH_CACHES \ FILE *fp = fopen("/proc/wbinvd", "r");\ if (fp == NULL) {\ perror("Cache flush module interface cannot be opened");\ exit(1);\ }\ char dummy;\ if (fread(&dummy, 1, 1, fp) == 0) {\ perror("Unable to access cache flush module interface");\ exit(1);\ }\ fclose(fp); #endif // Buffer timing result from a single job static void _rt_save_job_result() { if (_rt_jobs_complete >= _rt_max_jobs) { fprintf(stderr, "Max jobs setting too small! Trying to record job #%ld when we only have space for %ld jobs. Exiting...\n", _rt_jobs_complete, _rt_max_jobs); exit(1); } if (_rt_jobs_complete > -1 && _rt_will_output) { _rt_exec_time[_rt_jobs_complete] = _rt_end.tv_sec - _rt_start.tv_sec; _rt_exec_time[_rt_jobs_complete] *= 1e9; _rt_exec_time[_rt_jobs_complete] += _rt_end.tv_nsec - _rt_start.tv_nsec; #if MMDC_PROF _rt_mmdc_read[_rt_jobs_complete] = mmdc_res.read_bytes; _rt_mmdc_write[_rt_jobs_complete] = mmdc_res.write_bytes; #endif } } // Save all buffered timing results to disk static void _rt_write_to_file() { char fileName[_RT_FILENAME_LEN]; FILE *fp; if (!_rt_will_output) return; munlockall(); strcpy(fileName, _rt_run_id); strcat(fileName, ".txt"); fp = fopen(fileName, "a"); if (fp == NULL) { perror("Unable to open _rt_will_output file"); exit(1); } // Same format as the paired results with "none" for unused fields for (int i = 0; i < _rt_jobs_complete; i++){ fprintf(fp, "%s none %u none %ld %.f %s %d %.f %.f \n", _rt_our_prog_name, _rt_core, _rt_max_jobs, _rt_exec_time[i], _rt_run_id, i, #if MMDC_PROF _rt_mmdc_read[i], _rt_mmdc_write[i]); #else 0.0, 0.0); #endif } fclose(fp); #if LITMUS CLEANUP_LITMUS #endif } // Start a job static void _rt_start_loop() { #if LITMUS if (sleep_next_period() != 0) { perror("Unable to sleep for next period"); } #else sched_yield(); #endif FLUSH_CACHES #if MMDC_PROF /* This disables profiling, resets the counters, clears the overflow bit, and enables profiling */ start_mmdc_profiling(mmdc); #endif clock_gettime(CLOCK_MONOTONIC, &_rt_start); } // Complete a job static void _rt_stop_loop() { clock_gettime(CLOCK_MONOTONIC, &_rt_end); #if MMDC_PROF /* This freezes the profiling and makes results available */ pause_mmdc_profiling(mmdc); get_mmdc_profiling_results(mmdc, &mmdc_res); #endif _rt_save_job_result(); _rt_jobs_complete++; } /****** New API ****** * Intended structure: * * |int main(int argc, char **argv) { * | SET_UP * | ... * | for_each_job { * | tacleInit(); * | tacleMain(); * | } * | WRITE_TO_FILE * |} * * The main() function must call its parameters argc and argv for SET_UP to be * able to read them. * Only SET_UP necessarily has to be in main(). * * We use some niche C features, here's a quick explaination: * 1. The && operator doesn't evaluate the right-hand side of the expression * unless the left side evaluated to true. We use this to only execute * _rt_start_loop() when the loop will actually run. * 2. The comma operator executes the first expression and then throws away the * result. We use this to call our void function from inside a comparison. */ #define for_each_job \ for (; _rt_jobs_complete < _rt_max_jobs && (_rt_start_loop(),1); \ _rt_stop_loop()) /****** Legacy API ****** * Intended structure: * * |int main(int argc, char **argv) { * | SET_UP * | for (jobsComplete=0; jobsComplete