summaryrefslogtreecommitdiffstats
path: root/extra.h
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2020-10-19 01:09:53 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2020-10-19 01:09:53 -0400
commita71fc97fd262e1b5770f827047ea60bbaf38d9a2 (patch)
treeb45ef48c63a35817f2db93dd2fec718778f58b99 /extra.h
parent41358857592f1908d0c0f9898b6c9acabc1ad161 (diff)
Unify all the versions of extra.h into a single multipurpose header
There was previously a huge amount of shared code that had to be copied back and forth. This should reduce the maintenance burden by containing all future changes to a single file. New unified library is fully backwards-compatible but also introduces and the easy-to-use `for_each_job` macro which replaces the specific `for(...) START_LOOP ... STOP_LOOP` format requirement and is generally much harder to abuse. New unified library also automatically cleans up its shared memory and semaphores, so this commit also removes the separate `cleanupSemaphores` binary. I also found a precursor of `extra.h` written by Sims in `litmusStuff.h`. This code is only interesting for historical purposes, so it is also removed in this commit. This commit also adds debug options to all the Makefiles and silences rm's complaints about non-existent files in make clean.
Diffstat (limited to 'extra.h')
-rw-r--r--extra.h500
1 files changed, 500 insertions, 0 deletions
diff --git a/extra.h b/extra.h
new file mode 100644
index 0000000..3215951
--- /dev/null
+++ b/extra.h
@@ -0,0 +1,500 @@
1/**
2 * Copyright 2019 Sims Hill Osborne and Joshua Bakita
3 *
4 * This header provides facilities by which to separably run and time TACLeBench
5 * To use this for paired task timing, define PAIRED (pass CFLAGS=-DPAIRED to make)
6 **/
7#define _GNU_SOURCE
8#include <fcntl.h> // For O_CREAT and O_RDWR
9#include <sched.h> // For sched_yield()
10#include <semaphore.h> // For sem_{open, post, wait}()
11#include <stdio.h>
12#include <stdlib.h> // For exit()
13#include <string.h> // For strlen()
14#include <sys/mman.h> // For mlockall()
15#include <unistd.h> // For ftruncate()
16#include <time.h>
17
18// This is only visible if _GNU_SOURCE is defined, and that define does not
19// come along to places where this file is included. Address this by manually
20// forcing it into the global namespace.
21extern int sched_getcpu();
22
23// These constants correspond to the imx6q-sabredb platform
24#define LINE_SIZE 32
25#define L2_SIZE 16*2048*32
26
27#if __arm__
28#include <unistd.h>
29#include <sys/syscall.h>
30#endif
31
32#define LITMUS 0
33#define MC2 0
34#define MMDC_PROF 0
35
36#if LITMUS
37#include <litmus.h>
38#endif
39
40#if MMDC_PROF
41#include "/media/speedy/litmus/tools/mmdc/mmdc.h"
42#endif
43
44#if LITMUS
45#define SET_UP LOAD_PARAMS SETUP_LITMUS
46#else
47#define SET_UP LOAD_PARAMS
48#endif
49
50#if MMDC_PROF
51#define LOAD_PARAMS LOAD_PARAMS_ITRL SETUP_MMDC
52#else
53#define LOAD_PARAMS LOAD_PARAMS_ITRL
54#endif
55
56// Store state globally so that the job can be outside main()
57// Arrays use float as a comprimise between overflow and size
58// Paired arrays use long longs as precision is more important for those times
59#ifdef PAIRED
60long long *_rt_start_time;
61long long *_rt_end_time;
62#else
63float *_rt_exec_time;
64#endif
65#if MMDC_PERF
66float *_rt_mmdc_read;
67float *_rt_mmdc_write;
68#endif
69long _rt_jobs_complete;
70long _rt_max_jobs;
71int _rt_core;
72int _rt_will_output;
73struct timespec _rt_start, _rt_end;
74
75char *_rt_run_id;
76char *_rt_our_prog_name;
77char *_rt_other_prog_name;
78char *_rt_other_core;
79#define _RT_FILENAME_LEN 64
80#define _BILLION (1000*1000*1000)
81#ifdef PAIRED
82char *_rt_barrier;
83sem_t *_rt_first_sem, *_rt_second_sem;
84int _rt_lock_id;
85#endif
86
87static void _rt_load_params_itrl(int argc, char **argv) {
88#ifdef PAIRED
89 if (argc != 8) {
90 fprintf(stderr, "Usage: %s <name> <loops> <my core> <other core> <other name> <runID> <lockID>", argv[0]);
91 fprintf(stderr, " <name> string for logging. Name of this task.\n");
92 fprintf(stderr, " <loops> integer number of iterations. -1 for infinite.\n");
93 fprintf(stderr, " <my core> UNUSED. Core is now auto-detected.\n");
94 fprintf(stderr, " <other core> integer for logging. Core of paired task.\n");
95 fprintf(stderr, " <other name> string for logging. Name of paired task.\n");
96 fprintf(stderr, " <runID> string to append with .txt to yield output file name.\n");
97 fprintf(stderr, " <lockID> 1 to indicate this is pair member 1, otherwise pair member 2.\n");
98 exit(1);
99 }
100#else
101 if (argc != 6) {
102 fprintf(stderr, "Usage: %s <name> <loops> <my core> <runID> <save results?>\n", argv[0]);
103 fprintf(stderr, " <name> string for logging. Name of this task.\n");
104 fprintf(stderr, " <loops> integer number of iterations. -1 for infinite.\n");
105 fprintf(stderr, " <my core> UNUSED. Core is now auto-detected.\n");
106 fprintf(stderr, " <runID> string to append with .txt to yield output file name.\n");
107 fprintf(stderr, " <save results?> 1 to save results, 0 to discard.\n");
108 exit(1);
109 }
110#endif
111 _rt_our_prog_name = argv[1];
112 _rt_max_jobs = atol(argv[2]);
113 _rt_core = sched_getcpu();
114#ifdef PAIRED
115 _rt_other_core = argv[4];
116 _rt_other_prog_name = argv[5];
117 _rt_run_id = argv[6];
118 _rt_lock_id = atoi(argv[7]);
119 // The paired version doesn't support disabling output (legacy compatibility)
120 _rt_will_output = 1;
121#else
122 _rt_other_core = "none";
123 _rt_other_prog_name = "none";
124 _rt_run_id = argv[4];
125 _rt_will_output = atoi(argv[5]);
126#endif /* PAIRED */
127 if (_rt_max_jobs < 0 && _rt_will_output != 0) {
128 fprintf(stderr, "Infinite loops only supported when _rt_will_output is disabled!\n");
129 exit(1);
130 }
131 if (strlen(_rt_run_id) + 5 > _RT_FILENAME_LEN) {
132 fprintf(stderr, "Run ID is too large! Keep it to less than %d characters.\n", _RT_FILENAME_LEN);
133 exit(1);
134 }
135#ifdef PAIRED
136 _rt_start_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(long long));
137 _rt_end_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(long long));
138 if (!_rt_end_time || !_rt_start_time) {
139 perror("Unable to allocate buffers for execution times");
140 exit(1);
141 }
142 _rt_first_sem = sem_open("/_libextra_first_sem", O_CREAT, 644, 0);
143 _rt_second_sem = sem_open("/_libextra_second_sem", O_CREAT, 644, 0);
144 if (_rt_first_sem == SEM_FAILED || _rt_second_sem == SEM_FAILED) {
145 perror("Error while creating semaphores");
146 exit(1);
147 }
148 int barrier_file = shm_open("/_libextra_barrier", O_CREAT | O_RDWR, 644);
149 if (barrier_file == -1) {
150 perror("Error while creating shared memory for barrier synchronization");
151 exit(1);
152 }
153 if (ftruncate(barrier_file, 1) == -1) {
154 perror("Error while setting size of shared memory for barrier synchronization");
155 exit(1);
156 }
157 _rt_barrier = mmap(NULL, 1, PROT_WRITE, MAP_SHARED, barrier_file, 0);
158 if (_rt_barrier == MAP_FAILED) {
159 perror("Error while mapping shared memory for barrier synchronization");
160 exit(1);
161 }
162 *_rt_barrier = 0;
163#else
164 _rt_exec_time = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));
165 if (!_rt_exec_time) {
166 perror("Unable to allocate buffer for execution times");
167 exit(1);
168 }
169#endif /* PAIRED */
170 _rt_jobs_complete = 0;
171 mlockall(MCL_CURRENT || MCL_FUTURE);
172}
173#define LOAD_PARAMS_ITRL _rt_load_params_itrl(argc, argv);
174
175#define SETUP_MMDC \
176 _rt_mmdc_read = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\
177 _rt_mmdc_write = calloc(_rt_max_jobs * _rt_will_output, sizeof(float));\
178 if (!_rt_mmdc_read || !_rt_mmdc_write) {\
179 perror("Unable to allocate buffer for MMDC data");\
180 exit(1);\
181 }\
182 MMDC_PROFILE_RES_t mmdc_res;\
183 memset(&mmdc_res, 0, sizeof(MMDC_PROFILE_RES_t));\
184 int fd = open("/dev/mem", O_RDWR, 0);\
185 if (fd < 0) {\
186 perror("Unable to open /dev/mem");\
187 exit(1);\
188 }\
189 pMMDC_t mmdc = mmap(NULL, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, MMDC_P0_IPS_BASE_ADDR);\
190 if (mmdc == MAP_FAILED) {\
191 perror("Unable to map MMDC address space");\
192 exit(1);\
193 }\
194 mmdc->madpcr1 = axi_arm1;\
195 msync(&(mmdc->madpcr1),4,MS_SYNC);
196
197#define SETUP_LITMUS \
198 unsigned int wait = 0; \
199 if (be_migrate_to_domain(_rt_core) < 0) { \
200 perror("Unable to migrate to specified CPU"); \
201 exit(1); \
202 } \
203 struct reservation_config res; \
204 res.id = gettid(); \
205 res.cpu = cpu; \
206 res.priority = LITMUS_HIGHEST_PRIORITY; \
207 /* we take over half the CPU time (these are ns) */ \
208 res.polling_params.budget = ms2ns(3000); \
209 res.polling_params.period = ms2ns(3000); \
210 res.polling_params.offset = 0; \
211 res.polling_params.relative_deadline = ms2ns(3000); \
212 /* Not 100% sure that we should use periodic polling */ \
213 if (reservation_create(PERIODIC_POLLING, &res) < 0) { \
214 perror("Unable to create reservation"); \
215 exit(1); \
216 } \
217 struct rt_task rt_param; \
218 init_rt_task_param(&rt_param); \
219 /* Supposedly the next two parameters are irrelevant when reservations are enabled, but I'm leaving them anyway... */ \
220 rt_param.exec_cost = ms2ns(999); \
221 rt_param.period = ms2ns(1000); \
222 rt_param.priority = LITMUS_HIGHEST_PRIORITY; \
223 rt_param.cls = RT_CLASS_HARD; \
224 rt_param.release_policy = TASK_PERIODIC; \
225 rt_param.budget_policy = NO_ENFORCEMENT; \
226 rt_param.cpu = cpu; \
227 if (set_rt_task_param(gettid(), &rt_param) < 0) { \
228 perror("Unable to set real-time parameters"); \
229 exit(1); \
230 } \
231 if (init_litmus() != 0) { \
232 perror("init_litmus failed"); \
233 exit(1); \
234 } \
235 MC2_SETUP \
236 if (task_mode(LITMUS_RT_TASK) != 0) { \
237 perror("Unable to become real-time task"); \
238 exit(1); \
239 } \
240 if (wait && wait_for_ts_release() != 0) { \
241 perror("Unable to wait for taskset release"); \
242 exit(1); \
243 }
244
245#if MC2
246#define MC2_SETUP \
247 struct mc2_task mc2_param; \
248 mc2_param.res_id = gettid(); \
249 mc2_param.crit = CRIT_LEVEL_A; \
250 if (set_mc2_task_param(gettid(), &mc2_param) < 0) { \
251 perror("Unable to set MC^2 task params"); \
252 exit(1); \
253 } \
254 set_page_color(rt_param.cpu);
255#else
256#define MC2_SETUP
257#endif
258
259#define CLEANUP_LITMUS \
260 if (task_mode(BACKGROUND_TASK) != 0) { \
261 perror("Unable to become a real-time task"); \
262 exit(1); \
263 } \
264 reservation_destroy(gettid(), rt_param.cpu);
265
266#if __arm__
267// On ARM, manually flush the cache
268#define FLUSH_CACHES \
269 volatile uint8_t buffer[L2_SIZE * 4]; \
270 for (uint32_t j = 0; j < 4; j++) \
271 for (uint32_t i = 0; i < L2_SIZE * 4; i += LINE_SIZE) \
272 buffer[i]++;
273#else
274// On x86 call the wbinvld instruction (it's in a kernel module due to it being ring-0)
275#define FLUSH_CACHES \
276 FILE *fp = fopen("/proc/wbinvd", "r");\
277 if (fp == NULL) {\
278 perror("Cache flush module interface cannot be opened");\
279 exit(1);\
280 }\
281 char dummy;\
282 if (fread(&dummy, 1, 1, fp) == 0) {\
283 perror("Unable to access cache flush module interface");\
284 exit(1);\
285 }\
286 fclose(fp);
287#endif
288
289// This semaphore-based synchronization is from Sims
290#define FIRST_UNLOCK \
291 if (_rt_lock_id == 1) {\
292 if (sem_post(_rt_second_sem) != 0) {\
293 perror("Unable to unlock second semaphore");\
294 exit(1);\
295 }\
296 } \
297 else {\
298 if (sem_post(_rt_first_sem) != 0) {\
299 perror("Unable to unlock first semaphore");\
300 exit(1);\
301 }\
302 } \
303
304#define FIRST_LOCK \
305 if (_rt_lock_id == 1) {\
306 if (sem_wait(_rt_first_sem) != 0) {\
307 perror("Unable to wait on first semaphore");\
308 exit(1);\
309 }\
310 }\
311 else {\
312 if (sem_wait(_rt_second_sem) != 0) {\
313 perror("Unable to wait on second semaphore");\
314 exit(1);\
315 }\
316 }
317
318// This ensures a very low difference between pair member start times
319#define BARRIER_SYNC \
320 if (__sync_bool_compare_and_swap(_rt_barrier, 0, 1)) {\
321 while (!__sync_bool_compare_and_swap(_rt_barrier, 0, 0)) {};\
322 }\
323 else {\
324 __sync_bool_compare_and_swap(_rt_barrier, 1, 0);\
325 }
326
327// Buffer timing result from a single job
328static void _rt_save_job_result() {
329 if (_rt_jobs_complete >= _rt_max_jobs) {
330 fprintf(stderr, "Max jobs setting too small! Trying to record job #%ld when we only have space for %ld jobs. Exiting...\n", _rt_jobs_complete, _rt_max_jobs);
331 exit(1);
332 }
333 if (_rt_jobs_complete > -1 && _rt_will_output) {
334#ifdef PAIRED
335 _rt_start_time[_rt_jobs_complete] = _rt_start.tv_sec;
336 _rt_start_time[_rt_jobs_complete] *= _BILLION;
337 _rt_start_time[_rt_jobs_complete] += _rt_start.tv_nsec;
338 _rt_end_time[_rt_jobs_complete] = _rt_end.tv_sec;
339 _rt_end_time[_rt_jobs_complete] *= _BILLION;
340 _rt_end_time[_rt_jobs_complete] += _rt_end.tv_nsec;
341#else
342 _rt_exec_time[_rt_jobs_complete] = _rt_end.tv_sec - _rt_start.tv_sec;
343 _rt_exec_time[_rt_jobs_complete] *= _BILLION;
344 _rt_exec_time[_rt_jobs_complete] += _rt_end.tv_nsec - _rt_start.tv_nsec;
345#endif /* PAIRED */
346#if MMDC_PROF
347 _rt_mmdc_read[_rt_jobs_complete] = mmdc_res.read_bytes;
348 _rt_mmdc_write[_rt_jobs_complete] = mmdc_res.write_bytes;
349#endif
350 }
351}
352
353// Save all buffered timing results to disk
354static void _rt_write_to_file() {
355 char fileName[_RT_FILENAME_LEN];
356 FILE *fp;
357 munlockall();
358 if (!_rt_will_output)
359 goto out;
360 strcpy(fileName, _rt_run_id);
361 strcat(fileName, ".txt");
362 fp = fopen(fileName, "a");
363 if (fp == NULL) {
364 perror("Unable to open output file");
365 exit(1);
366 }
367 // Baseline output uses a similar format with "none" for unused fields
368 for (int i = 0; i < _rt_jobs_complete; i++){
369 fprintf(fp, "%s %s %u %s %ld", _rt_our_prog_name, _rt_other_prog_name,
370 _rt_core, _rt_other_core, _rt_max_jobs);
371#ifdef PAIRED
372 // For unclear legacy reasons, paired tasks emit sec and ns separately
373 fprintf(fp, " %lld %lld %lld %lld",
374 _rt_start_time[i] / _BILLION, _rt_start_time[i] % _BILLION,
375 _rt_end_time[i] / _BILLION, _rt_end_time[i] % _BILLION);
376#else
377 fprintf(fp, " %.f", _rt_exec_time[i]);
378#endif /* PAIRED */
379 fprintf(fp, " %s %d %.f %.f\n", _rt_run_id, i,
380#if MMDC_PROF
381 _rt_mmdc_read[i], _rt_mmdc_write[i]);
382#else
383 0.0, 0.0);
384#endif /* MMDC_PROF */
385 }
386 fclose(fp);
387out:
388#if LITMUS
389 CLEANUP_LITMUS
390#endif /* LITMUS */
391#ifdef PAIRED
392 munmap(_rt_barrier, 1);
393 shm_unlink("/_libextra_barrier");
394 sem_unlink("/_libextra_first_sem");
395 sem_unlink("/_libextra_second_sem");
396 free(_rt_start_time);
397 free(_rt_end_time);
398#else
399 free(_rt_exec_time);
400#endif /* PAIRED */
401#if MMDC_PROF
402 free(_rt_mmdc_read);
403 free(_rt_mmdc_write);
404#endif /* MMDC_PROF */
405}
406
407// Start a job
408static void _rt_start_loop() {
409#if LITMUS
410 if (sleep_next_period() != 0) {
411 perror("Unable to sleep for next period");
412 }
413#else
414 sched_yield();
415#endif /* LITMUS */
416#ifdef PAIRED
417 FIRST_UNLOCK
418 FIRST_LOCK
419#endif /* PAIRED */
420 FLUSH_CACHES
421#ifdef PAIRED
422 BARRIER_SYNC
423#endif /* PAIRED */
424#if MMDC_PROF
425 /* This disables profiling, resets the counters, clears the overflow bit, and enables profiling */
426 start_mmdc_profiling(mmdc);
427#endif /* MMDC_PROF */
428 clock_gettime(CLOCK_MONOTONIC, &_rt_start);
429}
430
431// Complete a job
432static void _rt_stop_loop() {
433 clock_gettime(CLOCK_MONOTONIC, &_rt_end);
434#if MMDC_PROF
435 /* This freezes the profiling and makes results available */
436 pause_mmdc_profiling(mmdc);
437 get_mmdc_profiling_results(mmdc, &mmdc_res);
438#endif /* MMDC_PROF */
439 _rt_save_job_result();
440 _rt_jobs_complete++;
441}
442
443/****** New API ******
444 * Intended structure:
445 *
446 * |int main(int argc, char **argv) {
447 * | SET_UP
448 * | ...
449 * | for_each_job {
450 * | tacleInit();
451 * | tacleMain();
452 * | }
453 * | WRITE_TO_FILE
454 * |}
455 *
456 * The main() function must call its parameters argc and argv for SET_UP to be
457 * able to read them.
458 * Only SET_UP necessarily has to be in main().
459 *
460 * We use some niche C features, here's a quick explaination:
461 * 1. The && operator doesn't evaluate the right-hand side of the expression
462 * unless the left side evaluated to true. We use this to only execute
463 * _rt_start_loop() when the loop will actually run.
464 * 2. The comma operator executes the first expression and then throws away the
465 * result. We use this to call our void function from inside a comparison.
466 */
467#define for_each_job \
468 for (; _rt_jobs_complete < _rt_max_jobs && (_rt_start_loop(),1); \
469 _rt_stop_loop())
470
471/****** Legacy API ******
472 * Intended structure:
473 *
474 * |int main(int argc, char **argv) {
475 * | SET_UP
476 * | for (jobsComplete=0; jobsComplete<maxJobs; jobsComplete++){
477 * | START_LOOP
478 * | tacleInit();
479 * | tacleMain();
480 * | STOP_LOOP
481 * | }
482 * | WRITE_TO_FILE
483 * | tacleReturn
484 * |}
485 *
486 * The main() function must call its parameters argc and argv for SET_UP to be
487 * able to read them.
488 */
489static int jobsComplete = 0;
490#define START_LOOP _rt_start_loop();
491#define STOP_LOOP _rt_stop_loop();
492#define WRITE_TO_FILE _rt_write_to_file();
493#define maxJobs _rt_max_jobs
494// Has been part of STOP_LOOP for quite some time
495#define SAVE_RESULTS \
496 #warning "The SAVE_RESULTS macro is deprecated and will soon be removed!";
497// Unclear if SLEEP is used anywhere.
498#define SLEEP \
499 #warning "The SLEEP macro is deprecated and may be removed!" \
500 nanosleep((const struct timespec[]){{0, 1000000}}, NULL);