Add shared functionalities for all components of preemption-migration overhead measurement.

Add data strucures Add functions to r/w binary files Add initial data parsing Add overhead "extractor" for multiple cores configurations
author: Andrea Bastoni <bastoni@cs.unc.edu> 2010-03-02 16:29:39 -0500
committer: Andrea Bastoni <bastoni@cs.unc.edu> 2010-03-02 16:29:39 -0500
commit: 2fbd2511d983ca384d0aa9127c3953f6f7ba023f (patch)
tree: 7cc8d7f8721555926b4169e0d7a9d74751b9c3b1
parent: 36ec0f4d0e32a4302e44a4bc25af73cb1026229f (diff)
2 files changed, 528 insertions, 0 deletions
diff --git a/include/pm_common.h b/include/pm_common.h
new file mode 100644
index 0000000..974dc9a
--- /dev/null
+++ b/include/pm_common.h
@@ -0,0 +1,123 @@
+/*
+ * preemption and migration overhead measurement
+ *
+ * common data structures and defines
+ */
+#ifndef PM_COMMON_H
+#define PM_COMMON_H
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
+ * when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
+ */
+/* Definitions and variables related to experimental measurement.
+ * What I eventually want is a test script that will cycle though
+ * different WSS and CACHESIZE, recompiling this program at
+ * each round (easier for memory management), but running all test
+ * without human intervention
+ */
+/* working set size, in KB */
+#define WSS     1024
+/* Cache size:
+ * Niagara: L2: 3MB
+ * Koruna: L2: 6MB every 2 cores
+ * Ludwig: L2: 3MB every 2 cores, L3 16MB
+ * Pound:  L2: 256KB, L3 8MB
+ */
+#define CACHESIZE       (6 * 1024)
+/* number of measurements that can be stored */
+#define DATAPOINTS      100000
+/* The following macro don't need (hopefully) any modification */
+/* Cache alignment (cache line size)
+ * Niagara, Koruna, Ludwig, Pound cache line size: 64B
+ */
+#define CACHEALIGNMENT  64
+/* ints per WSS */
+#define INTS_PER_WSS    (WSS*1024)/(sizeof(int))
+/* reads vs. writes ratio */
+#define READRATIO       75
+/* random seed */
+#define SEEDVAL         12345
+/* number of "working sets" to cycle through */
+#define NUMWS           ((CACHESIZE*2)/WSS)+2
+/* runtime in milliseconds -- 60s*/
+#define SIMRUNTIME      15000
+/* times to read warm memory to get accurate data */
+#define REFTOTAL        3
+#define NS_PER_MS       1000000
+struct data_entry {
+        unsigned long long timestamp;
+        /* cC cold cache access
+         * hH hot cache access
+         * pP preeption / migration
+         */
+        char access_type;
+        unsigned long long access_time;
+        unsigned int cpu;
+        unsigned long job_count;
+        unsigned long sched_count;
+        unsigned long last_rt_task;
+        unsigned long long preemption_length;
+};
+/* serializable data entry */
+struct saved_data_entry {
+        char access_type;
+        unsigned long long access_time;
+        unsigned int cpu;
+        unsigned long long preemption_length;
+};
+/* long long is a looot of time and should be enough for our needs
+ * However we keep the saved data in ull and leave to the analysis
+ * dealing with the conversion
+ */
+struct full_ovd_plen {
+        /* "current" cpu */
+        unsigned int curr_cpu;
+        /* last "seen" cpu (curr != last --> migration) */
+        unsigned int last_cpu;
+        /* overhead */
+        long long ovd;
+        /* preemption length */
+        long long plen;
+};
+struct ovd_plen {
+        long long ovd;
+        long long plen;
+};
+/* write data_entry -> saved_data_entry on disk */
+int serialize_data_entry(char *filename, struct data_entry *samples, int num);
+/* read saved_data_entry from disk */
+int read_sdata_entry(const char *filename, struct saved_data_entry **samples);
+/* get valid overhead from trace file */
+int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs);
+/* get ovd and pm length for different cores configurations */
+void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
+                 unsigned int cores_per_l2, unsigned int cores_per_chip,
+                 struct ovd_plen *preempt, int *pcount,
+                 struct ovd_plen *samel2, int *l2count,
+                 struct ovd_plen *samechip, int *chipcount,
+                 struct ovd_plen *offchip, int *offcount);
+#endif
diff --git a/pm_test/pm_common.c b/pm_test/pm_common.c
new file mode 100644
index 0000000..49ad2e7
--- /dev/null
+++ b/pm_test/pm_common.c
@@ -0,0 +1,405 @@
+/*
+ * pm_common.c
+ *
+ * Read / write data samples on file in binary format
+ * Perform first elaboration on the (possibily big) samples set
+ */
+#include "pm_common.h"
+#define BLOCK_MUL 500
+#define SBLOCK_SIZE 1024
+#define NUMHOTREADS 3
+#define min(a,b) ((a)<(b)?(a):(b))
+#ifdef DEBUG
+#define dprintf(arg...) fprintf(stderr,arg)
+#else
+#define dprintf(arg...)
+#endif
+/* simple sequential write on disk.
+ * (concurrent writes must be protected)
+ *
+ * saved_data_entry is ~ 20 B; so 100000 Datapoinst are ~ 2MB
+ */
+int serialize_data_entry(char *filename, struct data_entry *samples, int num)
+{
+        int fd;
+        int i, j;
+        /* buffer some data in memory before writing */
+        struct saved_data_entry to_save[SBLOCK_SIZE];
+        fd = open(filename, O_WRONLY | O_APPEND | O_CREAT, 0660);
+        if (fd == -1){
+                perror("open");
+                return -1;
+        }
+        for (i = 0; i < num / SBLOCK_SIZE; i++) {
+                memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
+                for (j = 0; j < SBLOCK_SIZE; j++) {
+                        to_save[j].access_type = samples[j].access_type;
+                        to_save[j].access_time =
+                                samples[j].access_time;
+                        to_save[j].cpu = samples[j].cpu;
+                        to_save[j].preemption_length =
+                                samples[j].preemption_length;
+                }
+                samples = &samples[j];
+                if (write(fd, to_save, sizeof(struct saved_data_entry) * SBLOCK_SIZE) == -1) {
+                        close(fd);
+                        perror("Write failed\n");
+                        return -1;
+                }
+        }
+        memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
+        for (j = 0; j < num % SBLOCK_SIZE; j++) {
+                to_save[j].access_type = samples[j].access_type;
+                to_save[j].access_time =
+                        samples[j].access_time;
+                to_save[j].cpu = samples[j].cpu;
+                to_save[j].preemption_length =
+                        samples[j].preemption_length;
+        }
+        if (write(fd, to_save, sizeof(struct saved_data_entry) * j) == -1) {
+                close(fd);
+                perror("Write failed\n");
+                return -1;
+        }
+        dprintf("Written %d entries\n", i*SBLOCK_SIZE + j);
+        close(fd);
+        return 0;
+}
+/*
+ * Presumably, all data will be written on little endian machines.
+ * I assume the binary format is little endian
+ *
+ * return -1 on error
+ * return number of samples on success
+ */
+int read_sdata_entry(const char *filename, struct saved_data_entry **samples)
+{
+        int fd;
+        int i,j;
+        int num_samples, file_size;
+        struct saved_data_entry block_read[BLOCK_MUL];
+        int bytes_read;
+        fd = open(filename, O_RDONLY);
+        if(fd == -1){
+                perror("open");
+                return -1;
+        }
+        /* Compute file size */
+        file_size = lseek(fd, 0, SEEK_END);
+        if(file_size == -1){
+                close(fd);
+                perror("lseek");
+                return -1;
+        }
+        /* Return to start position */
+        if(lseek(fd, 0, SEEK_SET) == -1){
+                close(fd);
+                perror("lseek");
+                return -1;
+        }
+        num_samples = file_size / sizeof(struct saved_data_entry);
+        dprintf("N entries: %d\n", num_samples);
+        /* Allocate memory for data_entry samples */
+        *samples = (struct saved_data_entry *) (malloc(num_samples *
+                                        sizeof(struct saved_data_entry)));
+        if(*samples == NULL){
+                close(fd);
+                perror("malloc");
+                return -1;
+        }
+        /* Read all the file */
+        j = 0;
+        do {
+                /* Read file (in BLOCK_MUL * sizeof(saved_data_entrty) block size) */
+                bytes_read = read(fd, &block_read, sizeof(struct saved_data_entry) * BLOCK_MUL);
+                if (bytes_read == -1) {
+                        perror("Cannot read\n");
+                        close(fd);
+                        free(*samples);
+                        return -1;
+                }
+                for (i = 0; i < (bytes_read / sizeof(struct saved_data_entry)); i++, j++)
+                        (*samples)[j] = block_read[i];
+        } while(bytes_read > 0);
+        close(fd);
+#ifdef DEBUG
+        for (i = 0; i < num_samples; i++)
+                fprintf(stderr,"(%c) - ACC %llu, CPU %u, PLEN %llu\n",
+                                (*samples)[i].access_type,
+                                (*samples)[i].access_time, (*samples)[i].cpu,
+                                (*samples)[i].preemption_length);
+#endif
+        return num_samples;
+}
+/*
+ * get_valid_ovd(): get valid overheads from trace file
+ *
+ * input:
+ * @filename:   input trace file name
+ *
+ * output:
+ * @full_costs: array of all overheads and preemption length associated
+ *              with valid measures
+ *
+ * full_costs MUST be initialized before entering this function and MUST
+ * be at least DATAPOINTS long
+ *
+ * @return:     number of valid measures read (implicit "true" length of
+ *              output array.)
+ *              If error return < 0
+ */
+int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs)
+{
+        struct saved_data_entry *samples;
+        /* total number of samples */
+        int num_samples;
+        /* number of valid samples */
+        int scount = 0;
+        int i;
+        /* do we have a valid hot read? */
+        int valid_hot_reads = 0;
+        /* how many consecutive hot reads? */
+        int total_hot_reads = 0;
+        /* do we have a valid hot cost? */
+        int valid_hot_cost = 0;
+        /* are the hot reads valid so far? */
+        int no_invalid_reads = 1;
+        /* what is the last cpu seen so far? */
+        unsigned int l_cpu = 0;
+        unsigned long long hot_cost;
+        /* if output array isn't long enough, early segfault */
+        memset(full_costs, 0, DATAPOINTS * sizeof(struct full_ovd_plen));
+        if ((num_samples = read_sdata_entry(filename, &samples)) < 0) {
+                printf("Cannot read %s\n", filename);
+                return -1;
+        }
+#ifdef DEBUG
+        fprintf(stderr, "Start Valid overhead\n");
+        /* write this on stderr so we can redirect it on a different stream */
+        for (i = 0; i < num_samples; i++)
+                fprintf(stderr, "(%c) - ACC %llu, CPU %u, PLEN %llu\n",
+                                samples[i].access_type,
+                                samples[i].access_time, samples[i].cpu,
+                                samples[i].preemption_length);
+        fprintf(stderr, "End Valid ovrhead\n");
+#endif
+        hot_cost = samples[0].access_time;
+        /* get valid overheads reads */
+        for (i = 0; i < num_samples; i++) {
+                if (samples[i].access_type == 'H' ||
+                        samples[i].access_type == 'h') {
+                        /* NUMHOTREADS consecutive 'H' hot reads should
+                         * (hopefully) appear. Take the minimum
+                         * of all valid reads up to when the first
+                         * invalid 'h' read appears.
+                         */
+                        total_hot_reads++;
+                        if (no_invalid_reads && samples[i].access_type == 'H') {
+                                valid_hot_reads++;
+                                if(valid_hot_reads == 1) {
+                                        hot_cost = samples[i].access_time;
+                                        fprintf(stderr, "h1 = %llu\n", hot_cost);
+                                }
+                                else {
+                                        hot_cost = min(hot_cost, samples[i].access_time);
+                                        fprintf(stderr, "hm = %llu\n", hot_cost);
+                                }
+                        } else {
+                                /* no valid hot reads found */
+                                no_invalid_reads = 0;
+                        }
+                        if (total_hot_reads == NUMHOTREADS) {
+                                /* check if we have a valid hotread value */
+                                if (valid_hot_reads > 0)
+                                        valid_hot_cost = 1;
+                                else
+                                        valid_hot_cost = 0;
+                                /* reset flags */
+                                valid_hot_reads = 0;
+                                total_hot_reads = 0;
+                                no_invalid_reads = 1;
+                        }
+                        /* update last seen cpu */
+                        l_cpu = samples[i].cpu;
+                } else {
+                        if (samples[i].access_type == 'P' ||
+                                samples[i].access_type == 'p') {
+                                /* this may be a preemption or a migration
+                                 * but we do not care now: just report it
+                                 * if it happened after a valid hot read
+                                 * and the preemption measure is valid
+                                 */
+                                if (valid_hot_cost && samples[i].access_type == 'P') {
+                                        full_costs[scount].curr_cpu = samples[i].cpu;
+                                        full_costs[scount].last_cpu = l_cpu;
+                                        full_costs[scount].ovd = (long long)
+                                                samples[i].access_time - hot_cost;
+                                        fprintf(stderr, "hs = %llu\n", hot_cost);
+                                        fprintf(stderr, "s1 = %llu\n", samples[i].access_time);
+                                        fprintf(stderr, "o1 = %lld\n", full_costs[scount].ovd);
+                                        full_costs[scount].plen = (long long)
+                                                samples[i].preemption_length;
+                                        dprintf("%u %u %lld %lld\n", full_costs[scount].curr_cpu,
+                                                        full_costs[scount].last_cpu,
+                                                        full_costs[scount].ovd, full_costs[scount].plen);
+                                        scount++;
+                                }
+                                /* update last seen cpu */
+                                l_cpu = samples[i].cpu;
+                        }
+                }
+        }
+        dprintf("End of valid entries\n");
+        free(samples);
+        return scount;
+}
+/*
+ * get_ovd_plen():      get overheads and preemption/migration length for
+ *                      different cores configurations
+ *
+ * For most architecture we can have at most 3 cache levels on the same chip
+ * and then off chip migrations. In the worst case we need to measure:
+ * [1] same core preemption, [2] same L2 migration,
+ * [3] same L3 (different L2, same chip) migration, [4] off chip migration.
+ *
+ * input:
+ * @full_costs:         see get_valid_ovd()
+ * @num_samples:        number of meaningful samples in full_costs
+ *                      (and in output arrays)
+ * @cores_per_l2:       how many cores share an l2 cache (read below)
+ * @cores_per_chip:     guess :)
+ *
+ * output:
+ * @preempt:            [1]
+ * @samel2:             [2]
+ * @samechip:           [3]
+ * @offchip:            [4]
+ *
+ * if samel2 is NULL, then L3 is not present and samel2 is equivalent to
+ * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used.
+ */
+void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
+                unsigned int cores_per_l2, unsigned int cores_per_chip,
+                struct ovd_plen *preempt, int *pcount,
+                struct ovd_plen *samel2, int *l2count,
+                struct ovd_plen *samechip, int *chipcount,
+                struct ovd_plen *offchip, int *offcount)
+{
+        int i;
+        *pcount = 0;
+        *l2count = 0;
+        *chipcount = 0;
+        *offcount = 0;
+        for (i = 0; i < num_samples; i++) {
+                dprintf("i = %d\n", i);
+                if (full_costs[i].curr_cpu == full_costs[i].last_cpu) {
+                        dprintf("preempt\n");
+                        /* preemption */
+                        preempt[*pcount].ovd = full_costs[i].ovd;
+                        preempt[*pcount].plen = full_costs[i].plen;
+                        (*pcount)++;
+                        continue;
+                }
+                if (samel2) {
+                        dprintf("l2\n");
+                        if ((full_costs[i].curr_cpu / cores_per_l2) == (full_costs[i].last_cpu / cores_per_l2)) {
+                                dprintf("same L2\n");
+                                /* same L2 migration */
+                                samel2[*l2count].ovd = full_costs[i].ovd;
+                                samel2[*l2count].plen = full_costs[i].plen;
+                                (*l2count)++;
+                                continue;
+                        }
+                        if (((full_costs[i].curr_cpu / cores_per_l2) != (full_costs[i].last_cpu / cores_per_l2)) &&
+                                        ((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip))) {
+                                dprintf("same L3\n");
+                                /* same L3 migration */
+                                samechip[*chipcount].ovd = full_costs[i].ovd;
+                                samechip[*chipcount].plen = full_costs[i].plen;
+                                (*chipcount)++;
+                                continue;
+                        }
+                } else {
+                        dprintf("same chip\n");
+                        /* samel2 == NULL */
+                        /* check same chip migration */
+                        if ((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip)) {
+                                samechip[*chipcount].ovd = full_costs[i].ovd;
+                                samechip[*chipcount].plen = full_costs[i].plen;
+                                (*chipcount)++;
+                                continue;
+                        }
+                }
+                dprintf("offchip\n");
+                /* if we are here it should have been a offchip migration */
+                offchip[*offcount].ovd = full_costs[i].ovd;
+                offchip[*offcount].plen = full_costs[i].plen;
+                (*offcount)++;
+        }
+        dprintf("pcount = %d\n", *pcount);
+        dprintf("chipcount = %d\n", *chipcount);
+        dprintf("l2count = %d\n", *l2count);
+        dprintf("offcount = %d\n", *offcount);
+}
author	Andrea Bastoni <bastoni@cs.unc.edu>	2010-03-02 16:29:39 -0500
committer	Andrea Bastoni <bastoni@cs.unc.edu>	2010-03-02 16:29:39 -0500
commit	2fbd2511d983ca384d0aa9127c3953f6f7ba023f (patch)
tree	7cc8d7f8721555926b4169e0d7a9d74751b9c3b1
parent	36ec0f4d0e32a4302e44a4bc25af73cb1026229f (diff)

diff --git a/include/pm_common.h b/include/pm_common.h new file mode 100644 index 0000000..974dc9a --- /dev/null +++ b/include/pm_common.h
@@ -0,0 +1,123 @@
	1	/*
	2	* preemption and migration overhead measurement
	3	*
	4	* common data structures and defines
	5	*/
	6	#ifndef PM_COMMON_H
	7	#define PM_COMMON_H
	8
	9	#include <stdio.h>
	10	#include <stdlib.h>
	11	#include <string.h>
	12	#include <unistd.h>
	13	#include <errno.h>
	14
	15	#include <sys/mman.h>
	16	#include <sys/types.h>
	17	#include <sys/stat.h>
	18	#include <fcntl.h>
	19
	20	/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
	21	* when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
	22	*/
	23
	24	/* Definitions and variables related to experimental measurement.
	25	* What I eventually want is a test script that will cycle though
	26	* different WSS and CACHESIZE, recompiling this program at
	27	* each round (easier for memory management), but running all test
	28	* without human intervention
	29	*/
	30	/* working set size, in KB */
	31	#define WSS 1024
	32	/* Cache size:
	33	* Niagara: L2: 3MB
	34	* Koruna: L2: 6MB every 2 cores
	35	* Ludwig: L2: 3MB every 2 cores, L3 16MB
	36	* Pound: L2: 256KB, L3 8MB
	37	*/
	38	#define CACHESIZE (6 * 1024)
	39
	40	/* number of measurements that can be stored */
	41	#define DATAPOINTS 100000
	42
	43	/* The following macro don't need (hopefully) any modification */
	44
	45	/* Cache alignment (cache line size)
	46	* Niagara, Koruna, Ludwig, Pound cache line size: 64B
	47	*/
	48	#define CACHEALIGNMENT 64
	49	/* ints per WSS */
	50	#define INTS_PER_WSS (WSS*1024)/(sizeof(int))
	51	/* reads vs. writes ratio */
	52	#define READRATIO 75
	53	/* random seed */
	54	#define SEEDVAL 12345
	55	/* number of "working sets" to cycle through */
	56	#define NUMWS ((CACHESIZE*2)/WSS)+2
	57	/* runtime in milliseconds -- 60s*/
	58	#define SIMRUNTIME 15000
	59	/* times to read warm memory to get accurate data */
	60	#define REFTOTAL 3
	61
	62	#define NS_PER_MS 1000000
	63
	64	struct data_entry {
	65	unsigned long long timestamp;
	66
	67	/* cC cold cache access
	68	* hH hot cache access
	69	* pP preeption / migration
	70	*/
	71	char access_type;
	72	unsigned long long access_time;
	73
	74	unsigned int cpu;
	75	unsigned long job_count;
	76	unsigned long sched_count;
	77	unsigned long last_rt_task;
	78	unsigned long long preemption_length;
	79	};
	80
	81	/* serializable data entry */
	82	struct saved_data_entry {
	83	char access_type;
	84	unsigned long long access_time;
	85	unsigned int cpu;
	86	unsigned long long preemption_length;
	87	};
	88
	89	/* long long is a looot of time and should be enough for our needs
	90	* However we keep the saved data in ull and leave to the analysis
	91	* dealing with the conversion
	92	*/
	93	struct full_ovd_plen {
	94	/* "current" cpu */
	95	unsigned int curr_cpu;
	96	/* last "seen" cpu (curr != last --> migration) */
	97	unsigned int last_cpu;
	98	/* overhead */
	99	long long ovd;
	100	/* preemption length */
	101	long long plen;
	102	};
	103
	104	struct ovd_plen {
	105	long long ovd;
	106	long long plen;
	107	};
	108
	109	/* write data_entry -> saved_data_entry on disk */
	110	int serialize_data_entry(char filename, struct data_entry samples, int num);
	111	/* read saved_data_entry from disk */
	112	int read_sdata_entry(const char filename, struct saved_data_entry *samples);
	113
	114	/* get valid overhead from trace file */
	115	int get_valid_ovd(const char filename, struct full_ovd_plen full_costs);
	116	/* get ovd and pm length for different cores configurations */
	117	void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
	118	unsigned int cores_per_l2, unsigned int cores_per_chip,
	119	struct ovd_plen preempt, int pcount,
	120	struct ovd_plen samel2, int l2count,
	121	struct ovd_plen samechip, int chipcount,
	122	struct ovd_plen offchip, int offcount);
	123	#endif


diff --git a/pm_test/pm_common.c b/pm_test/pm_common.c new file mode 100644 index 0000000..49ad2e7 --- /dev/null +++ b/pm_test/pm_common.c
@@ -0,0 +1,405 @@
	1	/*
	2	* pm_common.c
	3	*
	4	* Read / write data samples on file in binary format
	5	* Perform first elaboration on the (possibily big) samples set
	6	*/
	7	#include "pm_common.h"
	8
	9	#define BLOCK_MUL 500
	10	#define SBLOCK_SIZE 1024
	11
	12	#define NUMHOTREADS 3
	13	#define min(a,b) ((a)<(b)?(a):(b))
	14
	15	#ifdef DEBUG
	16	#define dprintf(arg...) fprintf(stderr,arg)
	17	#else
	18	#define dprintf(arg...)
	19	#endif
	20
	21	/* simple sequential write on disk.
	22	* (concurrent writes must be protected)
	23	*
	24	* saved_data_entry is ~ 20 B; so 100000 Datapoinst are ~ 2MB
	25	*/
	26	int serialize_data_entry(char filename, struct data_entry samples, int num)
	27	{
	28	int fd;
	29	int i, j;
	30
	31	/* buffer some data in memory before writing */
	32	struct saved_data_entry to_save[SBLOCK_SIZE];
	33
	34	fd = open(filename, O_WRONLY \| O_APPEND \| O_CREAT, 0660);
	35	if (fd == -1){
	36	perror("open");
	37	return -1;
	38	}
	39
	40	for (i = 0; i < num / SBLOCK_SIZE; i++) {
	41	memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
	42	for (j = 0; j < SBLOCK_SIZE; j++) {
	43	to_save[j].access_type = samples[j].access_type;
	44	to_save[j].access_time =
	45	samples[j].access_time;
	46	to_save[j].cpu = samples[j].cpu;
	47	to_save[j].preemption_length =
	48	samples[j].preemption_length;
	49	}
	50
	51	samples = &samples[j];
	52
	53	if (write(fd, to_save, sizeof(struct saved_data_entry) * SBLOCK_SIZE) == -1) {
	54	close(fd);
	55	perror("Write failed\n");
	56	return -1;
	57	}
	58	}
	59
	60	memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
	61	for (j = 0; j < num % SBLOCK_SIZE; j++) {
	62	to_save[j].access_type = samples[j].access_type;
	63	to_save[j].access_time =
	64	samples[j].access_time;
	65	to_save[j].cpu = samples[j].cpu;
	66	to_save[j].preemption_length =
	67	samples[j].preemption_length;
	68	}
	69
	70	if (write(fd, to_save, sizeof(struct saved_data_entry) * j) == -1) {
	71	close(fd);
	72	perror("Write failed\n");
	73	return -1;
	74	}
	75
	76	dprintf("Written %d entries\n", i*SBLOCK_SIZE + j);
	77
	78	close(fd);
	79	return 0;
	80	}
	81
	82	/*
	83	* Presumably, all data will be written on little endian machines.
	84	* I assume the binary format is little endian
	85	*
	86	* return -1 on error
	87	* return number of samples on success
	88	*/
	89	int read_sdata_entry(const char filename, struct saved_data_entry *samples)
	90	{
	91	int fd;
	92	int i,j;
	93
	94	int num_samples, file_size;
	95	struct saved_data_entry block_read[BLOCK_MUL];
	96
	97	int bytes_read;
	98
	99	fd = open(filename, O_RDONLY);
	100	if(fd == -1){
	101	perror("open");
	102	return -1;
	103	}
	104
	105	/* Compute file size */
	106	file_size = lseek(fd, 0, SEEK_END);
	107	if(file_size == -1){
	108	close(fd);
	109	perror("lseek");
	110	return -1;
	111	}
	112
	113	/* Return to start position */
	114	if(lseek(fd, 0, SEEK_SET) == -1){
	115	close(fd);
	116	perror("lseek");
	117	return -1;
	118	}
	119
	120	num_samples = file_size / sizeof(struct saved_data_entry);
	121	dprintf("N entries: %d\n", num_samples);
	122
	123	/* Allocate memory for data_entry samples */
	124	samples = (struct saved_data_entry ) (malloc(num_samples *
	125	sizeof(struct saved_data_entry)));
	126	if(*samples == NULL){
	127	close(fd);
	128	perror("malloc");
	129	return -1;
	130	}
	131
	132	/* Read all the file */
	133	j = 0;
	134	do {
	135	/* Read file (in BLOCK_MUL * sizeof(saved_data_entrty) block size) */
	136	bytes_read = read(fd, &block_read, sizeof(struct saved_data_entry) * BLOCK_MUL);
	137	if (bytes_read == -1) {
	138	perror("Cannot read\n");
	139	close(fd);
	140	free(*samples);
	141	return -1;
	142	}
	143
	144	for (i = 0; i < (bytes_read / sizeof(struct saved_data_entry)); i++, j++)
	145	(*samples)[j] = block_read[i];
	146
	147	} while(bytes_read > 0);
	148
	149	close(fd);
	150
	151	#ifdef DEBUG
	152	for (i = 0; i < num_samples; i++)
	153	fprintf(stderr,"(%c) - ACC %llu, CPU %u, PLEN %llu\n",
	154	(*samples)[i].access_type,
	155	(samples)[i].access_time, (samples)[i].cpu,
	156	(*samples)[i].preemption_length);
	157	#endif
	158	return num_samples;
	159	}
	160
	161	/*
	162	* get_valid_ovd(): get valid overheads from trace file
	163	*
	164	* input:
	165	* @filename: input trace file name
	166	*
	167	* output:
	168	* @full_costs: array of all overheads and preemption length associated
	169	* with valid measures
	170	*
	171	* full_costs MUST be initialized before entering this function and MUST
	172	* be at least DATAPOINTS long
	173	*
	174	* @return: number of valid measures read (implicit "true" length of
	175	* output array.)
	176	* If error return < 0
	177	*/
	178	int get_valid_ovd(const char filename, struct full_ovd_plen full_costs)
	179	{
	180	struct saved_data_entry *samples;
	181	/* total number of samples */
	182	int num_samples;
	183	/* number of valid samples */
	184	int scount = 0;
	185
	186	int i;
	187
	188	/* do we have a valid hot read? */
	189	int valid_hot_reads = 0;
	190	/* how many consecutive hot reads? */
	191	int total_hot_reads = 0;
	192	/* do we have a valid hot cost? */
	193	int valid_hot_cost = 0;
	194	/* are the hot reads valid so far? */
	195	int no_invalid_reads = 1;
	196	/* what is the last cpu seen so far? */
	197	unsigned int l_cpu = 0;
	198
	199	unsigned long long hot_cost;
	200
	201	/* if output array isn't long enough, early segfault */
	202	memset(full_costs, 0, DATAPOINTS * sizeof(struct full_ovd_plen));
	203
	204	if ((num_samples = read_sdata_entry(filename, &samples)) < 0) {
	205	printf("Cannot read %s\n", filename);
	206	return -1;
	207	}
	208
	209	#ifdef DEBUG
	210	fprintf(stderr, "Start Valid overhead\n");
	211	/* write this on stderr so we can redirect it on a different stream */
	212	for (i = 0; i < num_samples; i++)
	213	fprintf(stderr, "(%c) - ACC %llu, CPU %u, PLEN %llu\n",
	214	samples[i].access_type,
	215	samples[i].access_time, samples[i].cpu,
	216	samples[i].preemption_length);
	217	fprintf(stderr, "End Valid ovrhead\n");
	218	#endif
	219	hot_cost = samples[0].access_time;
	220	/* get valid overheads reads */
	221	for (i = 0; i < num_samples; i++) {
	222
	223	if (samples[i].access_type == 'H' \|\|
	224	samples[i].access_type == 'h') {
	225	/* NUMHOTREADS consecutive 'H' hot reads should
	226	* (hopefully) appear. Take the minimum
	227	* of all valid reads up to when the first
	228	* invalid 'h' read appears.
	229	*/
	230	total_hot_reads++;
	231	if (no_invalid_reads && samples[i].access_type == 'H') {
	232
	233	valid_hot_reads++;
	234	if(valid_hot_reads == 1) {
	235	hot_cost = samples[i].access_time;
	236	fprintf(stderr, "h1 = %llu\n", hot_cost);
	237	}
	238	else {
	239	hot_cost = min(hot_cost, samples[i].access_time);
	240	fprintf(stderr, "hm = %llu\n", hot_cost);
	241	}
	242
	243	} else {
	244	/* no valid hot reads found */
	245	no_invalid_reads = 0;
	246	}
	247
	248	if (total_hot_reads == NUMHOTREADS) {
	249	/* check if we have a valid hotread value */
	250	if (valid_hot_reads > 0)
	251	valid_hot_cost = 1;
	252	else
	253	valid_hot_cost = 0;
	254
	255	/* reset flags */
	256	valid_hot_reads = 0;
	257	total_hot_reads = 0;
	258	no_invalid_reads = 1;
	259	}
	260
	261	/* update last seen cpu */
	262	l_cpu = samples[i].cpu;
	263
	264	} else {
	265	if (samples[i].access_type == 'P' \|\|
	266	samples[i].access_type == 'p') {
	267
	268	/* this may be a preemption or a migration
	269	* but we do not care now: just report it
	270	* if it happened after a valid hot read
	271	* and the preemption measure is valid
	272	*/
	273	if (valid_hot_cost && samples[i].access_type == 'P') {
	274
	275	full_costs[scount].curr_cpu = samples[i].cpu;
	276	full_costs[scount].last_cpu = l_cpu;
	277	full_costs[scount].ovd = (long long)
	278	samples[i].access_time - hot_cost;
	279
	280	fprintf(stderr, "hs = %llu\n", hot_cost);
	281	fprintf(stderr, "s1 = %llu\n", samples[i].access_time);
	282	fprintf(stderr, "o1 = %lld\n", full_costs[scount].ovd);
	283
	284	full_costs[scount].plen = (long long)
	285	samples[i].preemption_length;
	286
	287	dprintf("%u %u %lld %lld\n", full_costs[scount].curr_cpu,
	288	full_costs[scount].last_cpu,
	289	full_costs[scount].ovd, full_costs[scount].plen);
	290
	291	scount++;
	292	}
	293
	294	/* update last seen cpu */
	295	l_cpu = samples[i].cpu;
	296	}
	297	}
	298	}
	299
	300	dprintf("End of valid entries\n");
	301
	302	free(samples);
	303	return scount;
	304	}
	305
	306	/*
	307	* get_ovd_plen(): get overheads and preemption/migration length for
	308	* different cores configurations
	309	*
	310	* For most architecture we can have at most 3 cache levels on the same chip
	311	* and then off chip migrations. In the worst case we need to measure:
	312	* [1] same core preemption, [2] same L2 migration,
	313	* [3] same L3 (different L2, same chip) migration, [4] off chip migration.
	314	*
	315	* input:
	316	* @full_costs: see get_valid_ovd()
	317	* @num_samples: number of meaningful samples in full_costs
	318	* (and in output arrays)
	319	* @cores_per_l2: how many cores share an l2 cache (read below)
	320	* @cores_per_chip: guess :)
	321	*
	322	* output:
	323	* @preempt: [1]
	324	* @samel2: [2]
	325	* @samechip: [3]
	326	* @offchip: [4]
	327	*
	328	* if samel2 is NULL, then L3 is not present and samel2 is equivalent to
	329	* samechip. cores_per_l2 should be equal to cores_per_chip, but is not used.
	330	*/
	331	void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
	332	unsigned int cores_per_l2, unsigned int cores_per_chip,
	333	struct ovd_plen preempt, int pcount,
	334	struct ovd_plen samel2, int l2count,
	335	struct ovd_plen samechip, int chipcount,
	336	struct ovd_plen offchip, int offcount)
	337	{
	338	int i;
	339	*pcount = 0;
	340	*l2count = 0;
	341	*chipcount = 0;
	342	*offcount = 0;
	343
	344	for (i = 0; i < num_samples; i++) {
	345	dprintf("i = %d\n", i);
	346
	347	if (full_costs[i].curr_cpu == full_costs[i].last_cpu) {
	348	dprintf("preempt\n");
	349	/* preemption */
	350	preempt[*pcount].ovd = full_costs[i].ovd;
	351	preempt[*pcount].plen = full_costs[i].plen;
	352	(*pcount)++;
	353
	354	continue;
	355
	356	}
	357
	358	if (samel2) {
	359	dprintf("l2\n");
	360
	361	if ((full_costs[i].curr_cpu / cores_per_l2) == (full_costs[i].last_cpu / cores_per_l2)) {
	362	dprintf("same L2\n");
	363	/* same L2 migration */
	364	samel2[*l2count].ovd = full_costs[i].ovd;
	365	samel2[*l2count].plen = full_costs[i].plen;
	366	(*l2count)++;
	367
	368	continue;
	369	}
	370
	371	if (((full_costs[i].curr_cpu / cores_per_l2) != (full_costs[i].last_cpu / cores_per_l2)) &&
	372	((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip))) {
	373	dprintf("same L3\n");
	374	/* same L3 migration */
	375	samechip[*chipcount].ovd = full_costs[i].ovd;
	376	samechip[*chipcount].plen = full_costs[i].plen;
	377	(*chipcount)++;
	378
	379	continue;
	380	}
	381	} else {
	382	dprintf("same chip\n");
	383	/* samel2 == NULL */
	384	/* check same chip migration */
	385	if ((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip)) {
	386
	387	samechip[*chipcount].ovd = full_costs[i].ovd;
	388	samechip[*chipcount].plen = full_costs[i].plen;
	389	(*chipcount)++;
	390
	391	continue;
	392	}
	393	}
	394	dprintf("offchip\n");
	395	/* if we are here it should have been a offchip migration */
	396	offchip[*offcount].ovd = full_costs[i].ovd;
	397	offchip[*offcount].plen = full_costs[i].plen;
	398	(*offcount)++;
	399	}
	400	dprintf("pcount = %d\n", *pcount);
	401	dprintf("chipcount = %d\n", *chipcount);
	402	dprintf("l2count = %d\n", *l2count);
	403	dprintf("offcount = %d\n", *offcount);
	404	}
	405