/** * Copyright 2019 Sims Hill Osborne and 2020 Joshua Bakita * * This header provides facilities by which to separably run and time TACLeBench * To use this for paired task timing, define PAIRED (pass CFLAGS=-DPAIRED to make) **/ #define _GNU_SOURCE #include // For O_CREAT and O_RDWR #include // For sched_yield() #include // For sem_{open, post, wait}() #include #include // For exit() #include // For strlen() #include // For mlockall() #include // For ftruncate() #include // This is only visible if _GNU_SOURCE is defined, and that define does not // come along to places where this file is included. Address this by manually // forcing it into the global namespace. extern int sched_getcpu(); // These constants correspond to the imx6q-sabredb platform #define LINE_SIZE 32 #define L2_SIZE 16*2048*32 #if __arm__ #include #include #endif #define LITMUS 1 #define MC2 0 #define MMDC_PROF 0 #if LITMUS #include #endif #if MMDC_PROF #include "/media/speedy/litmus/tools/mmdc/mmdc.h" #endif #if LITMUS #define SET_UP LOAD_PARAMS SETUP_LITMUS #else #define SET_UP LOAD_PARAMS #endif #if MMDC_PROF #define LOAD_PARAMS LOAD_PARAMS_ITRL SETUP_MMDC #else #define LOAD_PARAMS LOAD_PARAMS_ITRL #endif // Store state globally so that the job can be outside main() // Arrays use float as a comprimise between overflow and size // Paired arrays use long longs as precision is more important for those times #ifdef PAIRED long long *_rt_start_time; long long *_rt_end_time; #else float *_rt_exec_time; #endif #if MMDC_PERF float *_rt_mmdc_read; float *_rt_mmdc_write; #endif long _rt_jobs_complete; long _rt_max_jobs; int _rt_core; int _rt_will_output; struct timespec _rt_start, _rt_end; char *_rt_run_id; char *_rt_our_prog_name; char *_rt_other_prog_name; char *_rt_other_core; #define _RT_FILENAME_LEN 64 #define _BILLION (1000*1000*1000) #ifdef PAIRED char *_rt_barrier; sem_t *_rt_first_sem, *_rt_second_sem; int _rt_lock_id; #endif static void _rt_load_params_itrl(int argc, char **argv) { #ifdef PAIRED if (argc != 8) { fprintf(stderr, "Usage: %s ", argv[0]); fprintf(stderr, " string for logging. Name of this task.\n"); fprintf(stderr, " integer number of iterations. -1 for infinite.\n"); fprintf(stderr, " UNUSED. Core is now auto-detected.\n"); fprintf(stderr, " integer for logging. Core of paired task.\n"); fprintf(stderr, " string for logging. Name of paired task.\n"); fprintf(stderr, " string to append with .txt to yield output file name.\n"); fprintf(stderr, " 1 to indicate this is pair member 1, otherwise pair member 2.\n"); exit(1); } #else if (argc != 6) { fprintf(stderr, "Usage: %s \n", argv[0]); fprintf(stderr, " string for logging. Name of this task.\n"); fprintf(stderr, " integer number of iterations. -1 for infinite.\n"); fprintf(stderr, " UNUSED. Core is now auto-detected.\n"); fprintf(stderr, " string to append with .txt to yield output file name.\n"); fprintf(stderr, " 1 to save results, 0 to discard.\n"); exit(1); } #endif _rt_our_prog_name = argv[1]; _rt_max_jobs = atol(argv[2]); _rt_core = sched_getcpu(); #ifdef PAIRED _rt_other_core = argv[4]; _rt_other_prog_name = argv[5]; _rt_run_id = argv[6]; _rt_lock_id = atoi(argv[7]); // The paired version doesn't support disabling output (legacy compatibility) _rt_will_output = 1; #else _rt_other_core = "none"; _rt_other_prog_name = "none"; _rt_run_id = argv[4]; _rt_will_output = atoi(argv[5]); #endif /* PAIRED */ if (_rt_max_jobs < 0 && _rt_will_output != 0) { fprintf(stderr, "Infinite loops only supported when _rt_will_output is disabled!\n"); exit(1); } if (strlen(_rt_run_id) + 5 > _RT_FILENAME_LEN) { fprintf(stderr, "Run ID is too large! Keep it to less than %d characters.\n", _RT_FILENAME_LEN); exit(1); } #ifdef PAIRED _rt_start_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(long long)); _rt_end_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(long long)); if (!_rt_end_time || !_rt_start_time) { perror("Unable to allocate buffers for execution times"); exit(1); } _rt_first_sem = sem_open("/_libextra_first_sem", O_CREAT, 644, 0); _rt_second_sem = sem_open("/_libextra_second_sem", O_CREAT, 644, 0); if (_rt_first_sem == SEM_FAILED || _rt_second_sem == SEM_FAILED) { perror("Error while creating semaphores"); exit(1); } int barrier_file = shm_open("/_libextra_barrier", O_CREAT | O_RDWR, 644); if (barrier_file == -1) { perror("Error while creating shared memory for barrier synchronization"); exit(1); } if (ftruncate(barrier_file, 1) == -1) { perror("Error while setting size of shared memory for barrier synchronization"); exit(1); } _rt_barrier = mmap(NULL, 1, PROT_WRITE, MAP_SHARED, barrier_file, 0); if (_rt_barrier == MAP_FAILED) { perror("Error while mapping shared memory for barrier synchronization"); exit(1); } *_rt_barrier = 0; #else _rt_exec_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(float)); if (!_rt_exec_time) { perror("Unable to allocate buffer for execution times"); exit(1); } #endif /* PAIRED */ _rt_jobs_complete = 0; mlockall(MCL_CURRENT || MCL_FUTURE); } #define LOAD_PARAMS_ITRL _rt_load_params_itrl(argc, argv); #define SETUP_MMDC \ _rt_mmdc_read = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\ _rt_mmdc_write = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\ if (!_rt_mmdc_read || !_rt_mmdc_write) {\ perror("Unable to allocate buffer for MMDC data");\ exit(1);\ }\ MMDC_PROFILE_RES_t mmdc_res;\ memset(&mmdc_res, 0, sizeof(MMDC_PROFILE_RES_t));\ int fd = open("/dev/mem", O_RDWR, 0);\ if (fd < 0) {\ perror("Unable to open /dev/mem");\ exit(1);\ }\ pMMDC_t mmdc = mmap(NULL, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMDC_P0_IPS_BASE_ADDR);\ if (mmdc == MAP_FAILED) {\ perror("Unable to map MMDC address space");\ exit(1);\ }\ mmdc->madpcr1 = axi_arm1;\ msync(&(mmdc->madpcr1),4,MS_SYNC); #define SETUP_LITMUS \ unsigned int wait = 0; \ if (be_migrate_to_domain(_rt_core) < 0) { \ perror("Unable to migrate to specified CPU"); \ exit(1); \ } \ struct rt_task rt_param; \ init_rt_task_param(&rt_param); \ /* Supposedly the next two parameters are irrelevant when reservations are enabled, but I'm leaving them anyway... */ \ rt_param.exec_cost = ms2ns(999); \ rt_param.period = ms2ns(1000); \ rt_param.priority = LITMUS_HIGHEST_PRIORITY; \ rt_param.cls = RT_CLASS_HARD; \ rt_param.release_policy = TASK_PERIODIC; \ rt_param.budget_policy = NO_ENFORCEMENT; \ rt_param.cpu = _rt_core; \ if (set_rt_task_param(gettid(), &rt_param) < 0) { \ perror("Unable to set real-time parameters"); \ exit(1); \ } \ if (init_litmus() != 0) { \ perror("init_litmus failed"); \ exit(1); \ } \ MC2_SETUP \ if (task_mode(LITMUS_RT_TASK) != 0) { \ perror("Unable to become real-time task"); \ exit(1); \ } \ if (wait && wait_for_ts_release() != 0) { \ perror("Unable to wait for taskset release"); \ exit(1); \ } #if MC2 #define MC2_SETUP \ set_page_color(rt_param.cpu); #else #define MC2_SETUP #endif #define CLEANUP_LITMUS \ if (task_mode(BACKGROUND_TASK) != 0) { \ perror("Unable to become a real-time task"); \ exit(1); \ } \ #if __arm__ // On ARM, manually flush the cache #define FLUSH_CACHES \ volatile uint8_t buffer[L2_SIZE * 4]; \ for (uint32_t j = 0; j < 4; j++) \ for (uint32_t i = 0; i < L2_SIZE * 4; i += LINE_SIZE) \ buffer[i]++; #else // On x86 call the wbinvld instruction (it's in a kernel module due to it being ring-0) #define FLUSH_CACHES \ FILE *fp = fopen("/proc/wbinvd", "r");\ if (fp == NULL) {\ perror("Cache flush module interface cannot be opened");\ exit(1);\ }\ char dummy;\ if (fread(&dummy, 1, 1, fp) == 0) {\ perror("Unable to access cache flush module interface");\ exit(1);\ }\ fclose(fp); #endif // This semaphore-based synchronization is from Sims #define FIRST_UNLOCK \ if (_rt_lock_id == 1) {\ if (sem_post(_rt_second_sem) != 0) {\ perror("Unable to unlock second semaphore");\ exit(1);\ }\ } \ else {\ if (sem_post(_rt_first_sem) != 0) {\ perror("Unable to unlock first semaphore");\ exit(1);\ }\ } \ #define FIRST_LOCK \ if (_rt_lock_id == 1) {\ if (sem_wait(_rt_first_sem) != 0) {\ perror("Unable to wait on first semaphore");\ exit(1);\ }\ }\ else {\ if (sem_wait(_rt_second_sem) != 0) {\ perror("Unable to wait on second semaphore");\ exit(1);\ }\ } // This ensures a very low difference between pair member start times #define BARRIER_SYNC \ if (__sync_bool_compare_and_swap(_rt_barrier, 0, 1)) {\ while (!__sync_bool_compare_and_swap(_rt_barrier, 0, 0)) {};\ }\ else {\ __sync_bool_compare_and_swap(_rt_barrier, 1, 0);\ } // Buffer timing result from a single job static void _rt_save_job_result() { if (_rt_jobs_complete >= _rt_max_jobs) { fprintf(stderr, "Max jobs setting too small! Trying to record job #%ld when we only have space for %ld jobs. Exiting...\n", _rt_jobs_complete, _rt_max_jobs); exit(1); } if (_rt_jobs_complete > -1 && _rt_will_output) { #ifdef PAIRED _rt_start_time[_rt_jobs_complete] = _rt_start.tv_sec; _rt_start_time[_rt_jobs_complete] *= _BILLION; _rt_start_time[_rt_jobs_complete] += _rt_start.tv_nsec; _rt_end_time[_rt_jobs_complete] = _rt_end.tv_sec; _rt_end_time[_rt_jobs_complete] *= _BILLION; _rt_end_time[_rt_jobs_complete] += _rt_end.tv_nsec; #else _rt_exec_time[_rt_jobs_complete] = _rt_end.tv_sec - _rt_start.tv_sec; _rt_exec_time[_rt_jobs_complete] *= _BILLION; _rt_exec_time[_rt_jobs_complete] += _rt_end.tv_nsec - _rt_start.tv_nsec; #endif /* PAIRED */ #if MMDC_PROF _rt_mmdc_read[_rt_jobs_complete] = mmdc_res.read_bytes; _rt_mmdc_write[_rt_jobs_complete] = mmdc_res.write_bytes; #endif } } // Save all buffered timing results to disk static void _rt_write_to_file() { char fileName[_RT_FILENAME_LEN]; FILE *fp; munlockall(); if (!_rt_will_output) goto out; strcpy(fileName, _rt_run_id); strcat(fileName, ".txt"); fp = fopen(fileName, "a"); if (fp == NULL) { perror("Unable to open output file"); exit(1); } // Baseline output uses a similar format with "none" for unused fields for (int i = 0; i < _rt_jobs_complete; i++){ fprintf(fp, "%s %s %u %s %ld", _rt_our_prog_name, _rt_other_prog_name, _rt_core, _rt_other_core, _rt_max_jobs); #ifdef PAIRED // For unclear legacy reasons, paired tasks emit sec and ns separately fprintf(fp, " %lld %lld %lld %lld", _rt_start_time[i] / _BILLION, _rt_start_time[i] % _BILLION, _rt_end_time[i] / _BILLION, _rt_end_time[i] % _BILLION); #else fprintf(fp, " %.f", _rt_exec_time[i]); #endif /* PAIRED */ fprintf(fp, " %s %d %.f %.f\n", _rt_run_id, i, #if MMDC_PROF _rt_mmdc_read[i], _rt_mmdc_write[i]); #else 0.0, 0.0); #endif /* MMDC_PROF */ } fclose(fp); out: #if LITMUS CLEANUP_LITMUS #endif /* LITMUS */ #ifdef PAIRED munmap(_rt_barrier, 1); shm_unlink("/_libextra_barrier"); sem_unlink("/_libextra_first_sem"); sem_unlink("/_libextra_second_sem"); free(_rt_start_time); free(_rt_end_time); #else free(_rt_exec_time); #endif /* PAIRED */ #if MMDC_PROF free(_rt_mmdc_read); free(_rt_mmdc_write); #endif /* MMDC_PROF */ } // Start a job static void _rt_start_loop() { #if LITMUS if (sleep_next_period() != 0) { perror("Unable to sleep for next period"); } #else sched_yield(); #endif /* LITMUS */ #ifdef PAIRED FIRST_UNLOCK FIRST_LOCK #endif /* PAIRED */ FLUSH_CACHES #ifdef PAIRED BARRIER_SYNC #endif /* PAIRED */ #if MMDC_PROF /* This disables profiling, resets the counters, clears the overflow bit, and enables profiling */ start_mmdc_profiling(mmdc); #endif /* MMDC_PROF */ clock_gettime(CLOCK_MONOTONIC, &_rt_start); } // Complete a job static void _rt_stop_loop() { clock_gettime(CLOCK_MONOTONIC, &_rt_end); #if MMDC_PROF /* This freezes the profiling and makes results available */ pause_mmdc_profiling(mmdc); get_mmdc_profiling_results(mmdc, &mmdc_res); #endif /* MMDC_PROF */ _rt_save_job_result(); _rt_jobs_complete++; } /****** New API ****** * Intended structure: * * |int main(int argc, char **argv) { * | SET_UP * | ... * | for_each_job { * | tacleInit(); * | tacleMain(); * | } * | WRITE_TO_FILE * |} * * The main() function must call its parameters argc and argv for SET_UP to be * able to read them. * Only SET_UP necessarily has to be in main(). * * We use some niche C features, here's a quick explaination: * 1. The && operator doesn't evaluate the right-hand side of the expression * unless the left side evaluated to true. We use this to only execute * _rt_start_loop() when the loop will actually run. * 2. The comma operator executes the first expression and then throws away the * result. We use this to call our void function from inside a comparison. */ #define for_each_job \ for (; _rt_jobs_complete < _rt_max_jobs && (_rt_start_loop(),1); \ _rt_stop_loop()) /****** Legacy API ****** * Intended structure: * * |int main(int argc, char **argv) { * | SET_UP * | for (jobsComplete=0; jobsComplete