summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bin/pm_common.c652
-rw-r--r--include/pm_common.h146
2 files changed, 798 insertions, 0 deletions
diff --git a/bin/pm_common.c b/bin/pm_common.c
new file mode 100644
index 0000000..75118a4
--- /dev/null
+++ b/bin/pm_common.c
@@ -0,0 +1,652 @@
1/*
2 * pm_common.c
3 *
4 * Read / write data samples on file in binary format
5 * Perform first elaboration on the (possibily big) samples set
6 */
7#include "pm_common.h"
8
9#define BLOCK_MUL 500
10#define SBLOCK_SIZE 1024
11
12/* the number of hot reads that we can find is the same
13 * as the number of iterations we performed in pm_task
14 */
15#define NUMHOTREADS REFTOTAL
16#define min(a,b) ((a)<(b)?(a):(b))
17#define max(a,b) ((a)>(b)?(a):(b))
18
19/*
20 * Quick and dirty statistics to get a rough estimate of cache access times
21 * It does not tell the difference between "good" and "bad" overall
22 * sampling points, so max values coming out from this are not accurate
23 */
24#define WANT_STATISTICS
25#ifdef WANT_STATISTICS
26#include <math.h>
27#define CFREQ 2128.207
28#endif
29
30#ifdef DEBUG
31#define dprintf(arg...) fprintf(stderr,arg)
32#else
33#define dprintf(arg...)
34#endif
35
36/* simple sequential write on disk.
37 * (concurrent writes must be protected)
38 *
39 * saved_data_entry is ~ 20 B; so 100000 Datapoinst are ~ 2MB
40 */
41int serialize_data_entry(char *filename, struct data_entry *samples, int num)
42{
43 int fd;
44 int i, j;
45
46 /* buffer some data in memory before writing */
47 struct saved_data_entry to_save[SBLOCK_SIZE];
48
49 fd = open(filename, O_WRONLY | O_APPEND | O_CREAT, 0660);
50 if (fd == -1){
51 perror("open");
52 return -1;
53 }
54
55 for (i = 0; i < num / SBLOCK_SIZE; i++) {
56 memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
57 for (j = 0; j < SBLOCK_SIZE; j++) {
58 to_save[j].access_type = samples[j].access_type;
59 to_save[j].access_time =
60 samples[j].access_time;
61 to_save[j].cpu = samples[j].cpu;
62 to_save[j].preemption_length =
63 samples[j].preemption_length;
64 }
65
66 samples = &samples[j];
67
68 if (write(fd, to_save, sizeof(struct saved_data_entry) * SBLOCK_SIZE) == -1) {
69 close(fd);
70 perror("Write failed\n");
71 return -1;
72 }
73 }
74
75 memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
76 for (j = 0; j < num % SBLOCK_SIZE; j++) {
77 to_save[j].access_type = samples[j].access_type;
78 to_save[j].access_time =
79 samples[j].access_time;
80 to_save[j].cpu = samples[j].cpu;
81 to_save[j].preemption_length =
82 samples[j].preemption_length;
83 }
84
85 if (write(fd, to_save, sizeof(struct saved_data_entry) * j) == -1) {
86 close(fd);
87 perror("Write failed\n");
88 return -1;
89 }
90
91 dprintf("Written %d entries\n", i*SBLOCK_SIZE + j);
92
93 close(fd);
94 return 0;
95}
96
97/*
98 * Presumably, all data will be written on little endian machines.
99 * I assume the binary format is little endian
100 *
101 * return -1 on error
102 * return number of samples on success
103 */
104int read_sdata_entry(const char *filename, struct saved_data_entry **samples)
105{
106 int fd;
107 int i,j;
108
109 int num_samples, file_size;
110 struct saved_data_entry block_read[BLOCK_MUL];
111
112 int bytes_read;
113
114 fd = open(filename, O_RDONLY);
115 if(fd == -1){
116 perror("open");
117 return -1;
118 }
119
120 /* Compute file size */
121 file_size = lseek(fd, 0, SEEK_END);
122 if(file_size == -1){
123 close(fd);
124 perror("lseek");
125 return -1;
126 }
127
128 /* Return to start position */
129 if(lseek(fd, 0, SEEK_SET) == -1){
130 close(fd);
131 perror("lseek");
132 return -1;
133 }
134
135 num_samples = file_size / sizeof(struct saved_data_entry);
136 dprintf("N entries: %d\n", num_samples);
137
138 /* Allocate memory for data_entry samples */
139 *samples = (struct saved_data_entry *) (malloc(num_samples *
140 sizeof(struct saved_data_entry)));
141 if(*samples == NULL){
142 close(fd);
143 perror("malloc");
144 return -1;
145 }
146
147 /* Read all the file */
148 j = 0;
149 do {
150 /* Read file (in BLOCK_MUL * sizeof(saved_data_entrty) block size) */
151 bytes_read = read(fd, &block_read, sizeof(struct saved_data_entry) * BLOCK_MUL);
152 if (bytes_read == -1) {
153 perror("Cannot read\n");
154 close(fd);
155 free(*samples);
156 return -1;
157 }
158
159 for (i = 0; i < (bytes_read / sizeof(struct saved_data_entry)); i++, j++)
160 (*samples)[j] = block_read[i];
161
162 } while(bytes_read > 0);
163
164 close(fd);
165
166#ifdef VERBOSE_DEBUG
167 for (i = 0; i < num_samples; i++)
168 fprintf(stderr,"(%c) - ACC %llu, CPU %u, PLEN %llu\n",
169 (*samples)[i].access_type,
170 (*samples)[i].access_time, (*samples)[i].cpu,
171 (*samples)[i].preemption_length);
172#endif
173 return num_samples;
174}
175
176#ifdef WANT_STATISTICS
177/*
178 * print min, max, avg, stddev for the vector
179 * samples is the size of the population
180 * cpufreq is in MHz
181 */
182void print_rough_stats(unsigned long long *vector, int samples, double cpufreq,
183 int wss, int tss)
184{
185 unsigned long long min, max;
186 long double mi, qi, num_diff;
187 int i;
188
189 /* manage first value */
190 mi = vector[0];
191 qi = 0;
192
193 min = vector[0];
194 max = vector[0];
195
196 for (i = 1; i < (samples - 1); i++) {
197
198 if (vector[i] < min)
199 min = vector[i];
200 if (vector[i] > max)
201 max = vector[i];
202
203 num_diff = (long double)(vector[i] - mi);
204
205 mi += num_diff / ((long double)(i + 1));
206 qi += ((i) * (num_diff * num_diff)) / ((long double)(i + 1));
207 }
208
209 /* unbiased stddev should be computed on (samples - 2) */
210 /*
211 fprintf(stderr, "CPUFREQ = %f\nValues in tick\n", cpufreq);
212 fprintf(stderr, "max = %llu\nmin = %llu\nmean = %Lf\nstddev = %Lf\n",
213 max, min, mi, sqrtl(qi / (samples - 2)));
214 */
215 fprintf(stderr, "# wss, tss, max, min, avg, stddev\n");
216 fprintf(stderr, "%d, %d, %.5f, %.5f, %.5Lf, %.5Lf\n",
217 wss, tss,
218 max / cpufreq, min / cpufreq, mi / cpufreq,
219 sqrtl(qi / (samples - 2)) / cpufreq);
220}
221#endif
222
223/*
224 * get_valid_ovd(): get valid overheads from trace file
225 *
226 * input:
227 * @filename: input trace file name
228 *
229 * output:
230 * @full_costs: array of all overheads and preemption length associated
231 * with valid measures
232 *
233 * full_costs MUST be initialized before entering this function and MUST
234 * be at least DATAPOINTS long
235 *
236 * @return: number of valid measures read (implicit "true" length of
237 * output array.)
238 * If error return < 0
239 */
240int get_valid_ovd(const char *filename, struct full_ovd_plen **full_costs,
241 int wss, int tss)
242{
243 struct saved_data_entry *samples;
244 /* total number of samples */
245 int num_samples;
246 /* number of valid samples */
247 int scount = 0;
248
249 int i;
250
251 /* do we have a valid hot read? */
252 int valid_hot_reads = 0;
253 /* how many consecutive hot reads? */
254 int total_hot_reads = 0;
255 /* do we have a valid hot cost? */
256 int valid_hot_cost = 0;
257 /* are the hot reads valid so far? */
258 int no_invalid_reads = 1;
259 /* what is the last cpu seen so far? */
260 unsigned int l_cpu = 0;
261
262 unsigned long long hot_cost;
263#ifdef WANT_STATISTICS
264 unsigned long long *valid_c_samples;
265 unsigned long long *valid_h_samples;
266 unsigned long long *valid_p_samples;
267 int c_count;
268 int h_count;
269 int p_count;
270#endif
271
272 if ((num_samples = read_sdata_entry(filename, &samples)) < 0) {
273 fprintf(stderr, "Cannot read %s\n", filename);
274 return -1;
275 }
276
277 /* alloc an upper bound of the number of valid samples we can have */
278 *full_costs = (struct full_ovd_plen*) malloc(num_samples *
279 sizeof(struct full_ovd_plen));
280 if (*full_costs == NULL) {
281 fprintf(stderr, "Cannot allocate overhead array\n");
282 free(samples);
283 return -1;
284 }
285 memset(*full_costs, 0, num_samples * sizeof(struct full_ovd_plen));
286
287#ifdef WANT_STATISTICS
288 valid_c_samples = (unsigned long long *) malloc(num_samples *
289 sizeof(unsigned long long));
290 if (valid_c_samples == NULL) {
291 fprintf(stderr, "Cannot allocate overhead array\n");
292 free(samples);
293 return -1;
294 }
295 valid_h_samples = (unsigned long long *) malloc(num_samples *
296 sizeof(unsigned long long));
297 if (valid_h_samples == NULL) {
298 fprintf(stderr, "Cannot allocate overhead array\n");
299 free(valid_c_samples);
300 free(samples);
301 return -1;
302 }
303 valid_p_samples = (unsigned long long *) malloc(num_samples *
304 sizeof(unsigned long long));
305 if (valid_p_samples == NULL) {
306 fprintf(stderr, "Cannot allocate overhead array\n");
307 free(valid_h_samples);
308 free(valid_c_samples);
309 free(samples);
310 return -1;
311 }
312 memset(valid_c_samples, 0, num_samples * sizeof(unsigned long long));
313 memset(valid_h_samples, 0, num_samples * sizeof(unsigned long long));
314 memset(valid_p_samples, 0, num_samples * sizeof(unsigned long long));
315
316 c_count = 0;
317 h_count = 0;
318 p_count = 0;
319#endif
320#ifdef VERBOSE_DEBUG
321 fprintf(stderr, "Start collected overhead\n");
322 /* write this on stderr so we can redirect it on a different stream */
323 for (i = 0; i < num_samples; i++)
324 fprintf(stderr, "(%c) - ACC %llu, CPU %u, PLEN %llu\n",
325 samples[i].access_type,
326 samples[i].access_time, samples[i].cpu,
327 samples[i].preemption_length);
328 fprintf(stderr, "End collected ovrhead\n");
329#endif
330 hot_cost = samples[0].access_time;
331
332 /* get valid overheads reads */
333 for (i = 0; i < num_samples; i++) {
334
335 if (samples[i].access_type == 'H' ||
336 samples[i].access_type == 'h') {
337 /* NUMHOTREADS consecutive 'H' hot reads should
338 * (hopefully) appear. Take the minimum
339 * of all valid reads up to when the first
340 * invalid 'h' read appears.
341 */
342 total_hot_reads++;
343 if (no_invalid_reads && samples[i].access_type == 'H') {
344
345 valid_hot_reads++;
346 if(valid_hot_reads == 1) {
347 hot_cost = samples[i].access_time;
348 }
349 else {
350 hot_cost = min(hot_cost, samples[i].access_time);
351 }
352
353 } else {
354 /* no valid hot reads found */
355 no_invalid_reads = 0;
356 }
357
358 if (total_hot_reads == NUMHOTREADS) {
359 /* check if we have a valid hotread value */
360 if (valid_hot_reads > 0)
361 valid_hot_cost = 1;
362 else
363 valid_hot_cost = 0;
364
365 /* reset flags */
366 valid_hot_reads = 0;
367 total_hot_reads = 0;
368 no_invalid_reads = 1;
369 }
370
371 /* update last seen cpu */
372 l_cpu = samples[i].cpu;
373
374 } else {
375 if (samples[i].access_type == 'P' ||
376 samples[i].access_type == 'p') {
377
378 /* this may be a preemption or a migration
379 * but we do not care now: just report it
380 * if it happened after a valid hot read
381 * and the preemption measure is valid
382 */
383 if (valid_hot_cost && samples[i].access_type == 'P') {
384
385 (*full_costs)[scount].curr_cpu = samples[i].cpu;
386 (*full_costs)[scount].last_cpu = l_cpu;
387 (*full_costs)[scount].ovd = (long long)
388 samples[i].access_time - hot_cost;
389
390 (*full_costs)[scount].plen = (long long)
391 samples[i].preemption_length;
392
393 dprintf("%u %u %lld %lld\n", (*full_costs)[scount].curr_cpu,
394 (*full_costs)[scount].last_cpu,
395 (*full_costs)[scount].ovd, (*full_costs)[scount].plen);
396
397 scount++;
398 }
399
400 /* update last seen cpu */
401 l_cpu = samples[i].cpu;
402 }
403 }
404#ifdef WANT_STATISTICS
405 if (samples[i].access_type == 'C')
406 valid_c_samples[c_count++] = samples[i].access_time;
407 else if (samples[i].access_type == 'H')
408 valid_h_samples[h_count++] = samples[i].access_time;
409 else if (samples[i].access_type == 'P')
410 valid_p_samples[p_count++] = samples[i].access_time;
411#endif
412 }
413
414 dprintf("End of valid entries\n");
415#ifdef WANT_STATISTICS
416 fprintf(stderr, "# Cold cache\n");
417 print_rough_stats(valid_c_samples, c_count, CFREQ, wss, tss);
418 fprintf(stderr, "# Hot cache\n");
419 print_rough_stats(valid_h_samples, h_count, CFREQ, wss, tss);
420 fprintf(stderr, "# After preemption\n");
421 print_rough_stats(valid_p_samples, p_count, CFREQ, wss, tss);
422 fprintf(stderr, "## Nsamples(c,h,p): %d, %d, %d\n",
423 c_count, h_count, p_count);
424
425 free(valid_p_samples);
426 free(valid_h_samples);
427 free(valid_c_samples);
428#endif
429
430 free(samples);
431 return scount;
432}
433
434/*
435 * TODO we are not using this function anymore as the description of the
436 * cpus topology for our systems (xeon) doesn't match the cpu
437 * number assignment implied by this function to work correctly.
438 * Should be fixed at some point (also because i7 uses a different
439 * cpu assignment). --- See below for the currently used function.
440 *
441 * get_ovd_plen(): get overheads and preemption/migration length for
442 * different cores configurations
443 *
444 * For most architecture we can have at most 3 cache levels on the same chip
445 * and then off chip migrations. In the worst case we need to measure:
446 * [1] same core preemption, [2] same L2 migration,
447 * [3] same L3 (different L2, same chip) migration, [4] off chip migration.
448 *
449 * Linux is processing _physical_ CPUs in a "linear" fashion, assigning a
450 * sequence number to one core on a physical cpu and then jumping
451 * on the next physical cpu. Look in sysfs for more details on cpu
452 * topology. This doesn't seems to apply to NUMA machines (e.g., Opteron 8212,
453 * Pound -> Nehalem i7) so the following function is probably working there
454 * but we need to check the topology first...
455 *
456 * input:
457 * @full_costs: see get_valid_ovd()
458 * @num_samples: number of meaningful samples in full_costs
459 * (and in output arrays)
460 * @cores_per_l2: how many cores share an l2 cache (read below)
461 * @cores_per_chip: guess :)
462 *
463 * output:
464 * @preempt: [1]
465 * @samel2: [2]
466 * @samechip: [3]
467 * @offchip: [4]
468 *
469 * if samel2 is NULL, then L3 is not present and samel2 is equivalent to
470 * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used.
471 */
472void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
473 unsigned int cores_per_l2, unsigned int cores_per_chip,
474 struct ovd_plen *preempt, int *pcount,
475 struct ovd_plen *samel2, int *l2count,
476 struct ovd_plen *samechip, int *chipcount,
477 struct ovd_plen *offchip, int *offcount)
478{
479 int i;
480 *pcount = 0;
481 *l2count = 0;
482 *chipcount = 0;
483 *offcount = 0;
484
485 unsigned int curr_cpu;
486 unsigned int last_cpu;
487
488 for (i = 0; i < num_samples; i++) {
489 dprintf("i = %d\n", i);
490 curr_cpu = full_costs[i].curr_cpu;
491 last_cpu = full_costs[i].last_cpu;
492
493 if (curr_cpu == last_cpu) {
494 dprintf("preempt\n");
495 /* preemption */
496 preempt[*pcount].ovd = full_costs[i].ovd;
497 preempt[*pcount].plen = full_costs[i].plen;
498 (*pcount)++;
499
500 continue;
501
502 }
503
504 if (samel2) {
505 dprintf("l2\n");
506
507 if ((curr_cpu / cores_per_l2) == (last_cpu / cores_per_l2)) {
508 dprintf("same L2\n");
509 /* same L2 migration */
510 samel2[*l2count].ovd = full_costs[i].ovd;
511 samel2[*l2count].plen = full_costs[i].plen;
512 (*l2count)++;
513
514 continue;
515 }
516
517 if (((curr_cpu / cores_per_l2) != (last_cpu / cores_per_l2)) &&
518 ((curr_cpu / cores_per_chip) == (last_cpu / cores_per_chip))) {
519 dprintf("same L3\n");
520 /* same L3 migration */
521 samechip[*chipcount].ovd = full_costs[i].ovd;
522 samechip[*chipcount].plen = full_costs[i].plen;
523 (*chipcount)++;
524
525 continue;
526 }
527 } else {
528 dprintf("same chip\n");
529 /* samel2 == NULL */
530 /* check same chip migration */
531 if ((curr_cpu / cores_per_chip) == (last_cpu / cores_per_chip)) {
532
533 samechip[*chipcount].ovd = full_costs[i].ovd;
534 samechip[*chipcount].plen = full_costs[i].plen;
535 (*chipcount)++;
536
537 continue;
538 }
539 }
540 dprintf("offchip\n");
541 /* if we are here it should have been a offchip migration */
542 offchip[*offcount].ovd = full_costs[i].ovd;
543 offchip[*offcount].plen = full_costs[i].plen;
544 (*offcount)++;
545 }
546 dprintf("pcount = %d\n", *pcount);
547 dprintf("chipcount = %d\n", *chipcount);
548 dprintf("l2count = %d\n", *l2count);
549 dprintf("offcount = %d\n", *offcount);
550}
551
552/*
553 * get_ovd_plen_umaxeon(): get overheads and preemption/migration length
554 * for different cores conf. on uma xeon
555 *
556 * See above comments. This should probably work on most xeon (at least on
557 * jupiter and ludwig)
558 *
559 * input:
560 * @full_costs: see get_valid_ovd()
561 * @num_samples: number of meaningful samples in full_costs
562 * (and in output arrays)
563 * @cores_per_l2: how many cores share an l2 cache (read below)
564 * @num_phys_cpu: guess :)
565 *
566 * output:
567 * @preempt: [1]
568 * @samel2: [2]
569 * @samechip: [3]
570 * @offchip: [4]
571 *
572 * FIXME: samel2 == NULL to say that L3 is not there... is tricky...
573 * if samel2 is NULL, then L3 is not present and samel2 is equivalent to
574 * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used.
575 */
576void get_ovd_plen_umaxeon(struct full_ovd_plen *full_costs, int num_samples,
577 unsigned int cores_per_l2, unsigned int num_phys_cpu,
578 struct ovd_plen *preempt, int *pcount,
579 struct ovd_plen *samel2, int *l2count,
580 struct ovd_plen *samechip, int *chipcount,
581 struct ovd_plen *offchip, int *offcount)
582{
583 int i;
584 *pcount = 0;
585 *l2count = 0;
586 *chipcount = 0;
587 *offcount = 0;
588
589 unsigned int curr_cpu;
590 unsigned int last_cpu;
591
592 for (i = 0; i < num_samples; i++) {
593
594 dprintf("i = %d\n", i);
595 curr_cpu = full_costs[i].curr_cpu;
596 last_cpu = full_costs[i].last_cpu;
597
598 if (curr_cpu == last_cpu) {
599 dprintf("preempt\n");
600 /* preemption */
601 preempt[*pcount].ovd = full_costs[i].ovd;
602 preempt[*pcount].plen = full_costs[i].plen;
603 (*pcount)++;
604
605 continue;
606 }
607
608 if ((curr_cpu % num_phys_cpu) == (last_cpu % num_phys_cpu)) {
609 /* ok, both cpus on the same chip, which caches do they shares? */
610 if (samel2) {
611 /* we have both L3 and L2.
612 * We already know we are sharing L3 */
613 if (((curr_cpu / num_phys_cpu) / cores_per_l2) ==
614 ((last_cpu / num_phys_cpu) / cores_per_l2)) {
615 /* they share also L2 */
616 dprintf("same L2\n");
617 samel2[*l2count].ovd = full_costs[i].ovd;
618 samel2[*l2count].plen = full_costs[i].plen;
619 (*l2count)++;
620
621 continue;
622 } else {
623 /* this is an L3 migration */
624 dprintf("same L3\n");
625 samechip[*chipcount].ovd = full_costs[i].ovd;
626 samechip[*chipcount].plen = full_costs[i].plen;
627 (*chipcount)++;
628
629 continue;
630 }
631 } else {
632 /* ok, just L2 on this machine, this is an L2 migration */
633 samechip[*chipcount].ovd = full_costs[i].ovd;
634 samechip[*chipcount].plen = full_costs[i].plen;
635 (*chipcount)++;
636
637 continue;
638 }
639 }
640
641 dprintf("offchip\n");
642 /* if we are here it should have been an offchip migration */
643 offchip[*offcount].ovd = full_costs[i].ovd;
644 offchip[*offcount].plen = full_costs[i].plen;
645 (*offcount)++;
646 }
647 dprintf("pcount = %d\n", *pcount);
648 dprintf("chipcount = %d\n", *chipcount);
649 dprintf("l2count = %d\n", *l2count);
650 dprintf("offcount = %d\n", *offcount);
651}
652
diff --git a/include/pm_common.h b/include/pm_common.h
new file mode 100644
index 0000000..f1d7be6
--- /dev/null
+++ b/include/pm_common.h
@@ -0,0 +1,146 @@
1/*
2 * preemption and migration overhead measurement
3 *
4 * common data structures and defines
5 */
6#ifndef PM_COMMON_H
7#define PM_COMMON_H
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <errno.h>
14
15#include <sys/mman.h>
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <fcntl.h>
19
20/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
21 * when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
22 * ATM only WSS can be passed through scons building mechanism
23 */
24
25/* Definitions and variables related to experimental measurement.
26 * What I eventually want is a test script that will cycle though
27 * different WSS and CACHESIZE, recompiling this program at
28 * each round (easier for memory management), but running all test
29 * without human intervention
30 */
31/*
32 * default working set size, in KB
33 * non-default WSS are taken from the test script (-DWSS=...)
34 */
35#ifndef WSS
36#define WSS 3072
37#endif
38/* Cache size:
39 * Niagara: L2: 3MB
40 * Koruna: L2: 6MB every 2 cores
41 * Ludwig: L2: 3MB every 2 cores, L3 12MB
42 * Pound: L2: 256KB, L3 8MB
43 */
44#define CACHESIZE (12 * 1024)
45
46/* number of measurements that can be stored per single pm_task */
47#define DATAPOINTS 100000
48
49/* The following macro don't need (hopefully) any modification */
50
51/* Cache alignment (cache line size)
52 * Niagara, Koruna, Ludwig, Pound cache line size: 64B
53 */
54#define CACHEALIGNMENT 64
55/* ints per WSS */
56#define INTS_PER_WSS (WSS*1024)/(sizeof(int))
57/* reads vs. writes ratio */
58#define READRATIO 75
59/* random seed */
60#define SEEDVAL 12345
61/* number of "working sets" to cycle through */
62#define NUMWS ((CACHESIZE*2)/WSS)+2
63/* runtime in milliseconds -- 60s*/
64#define SIMRUNTIME 60000
65/* times to read warm memory to get accurate data */
66/* preliminary experiments on Ludwig shows that we can safely set
67 * this to just 2 iteration (first and second 'H' access are ~ equal)
68 * (it was 3)
69 */
70#define REFTOTAL 2
71
72#define NS_PER_MS 1000000
73
74struct data_entry {
75 unsigned long long timestamp;
76
77 /* cC cold cache access
78 * hH hot cache access
79 * pP preeption / migration
80 */
81 char access_type;
82 unsigned long long access_time;
83
84 unsigned int cpu;
85 unsigned long job_count;
86 unsigned long sched_count;
87 unsigned long last_rt_task;
88 unsigned long long preemption_length;
89};
90
91/* serializable data entry */
92struct saved_data_entry {
93 char access_type;
94 unsigned long long access_time;
95 unsigned int cpu;
96 unsigned long long preemption_length;
97};
98
99/* long long is a looot of time and should be enough for our needs
100 * However we keep the saved data in ull and leave to the analysis
101 * dealing with the conversion
102 */
103struct full_ovd_plen {
104 /* "current" cpu */
105 unsigned int curr_cpu;
106 /* last "seen" cpu (curr != last --> migration) */
107 unsigned int last_cpu;
108 /* overhead */
109 long long ovd;
110 /* preemption length */
111 long long plen;
112};
113
114struct ovd_plen {
115 long long ovd;
116 long long plen;
117};
118
119/* write data_entry -> saved_data_entry on disk */
120int serialize_data_entry(char *filename, struct data_entry *samples, int num);
121/* read saved_data_entry from disk */
122int read_sdata_entry(const char *filename, struct saved_data_entry **samples);
123
124/* get valid overhead from trace file */
125int get_valid_ovd(const char *filename, struct full_ovd_plen **full_costs,
126 int wss, int tss);
127
128/* get ovd and pm length for different cores configurations (on uma xeon) */
129/* Watch out for different topologies:
130 * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
131 */
132void get_ovd_plen_umaxeon(struct full_ovd_plen *full_costs, int num_samples,
133 unsigned int cores_per_l2, unsigned int num_phys_cpu,
134 struct ovd_plen *preempt, int *pcount,
135 struct ovd_plen *samel2, int *l2count,
136 struct ovd_plen *samechip, int *chipcount,
137 struct ovd_plen *offchip, int *offcount);
138
139/* get ovd and pm length for different cores configurations */
140void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
141 unsigned int cores_per_l2, unsigned int cores_per_chip,
142 struct ovd_plen *preempt, int *pcount,
143 struct ovd_plen *samel2, int *l2count,
144 struct ovd_plen *samechip, int *chipcount,
145 struct ovd_plen *offchip, int *offcount);
146#endif