aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-03-02 16:29:39 -0500
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-03-02 16:29:39 -0500
commit2fbd2511d983ca384d0aa9127c3953f6f7ba023f (patch)
tree7cc8d7f8721555926b4169e0d7a9d74751b9c3b1
parent36ec0f4d0e32a4302e44a4bc25af73cb1026229f (diff)
Add shared functionalities for all components of preemption-migration overhead measurement.
Add data strucures Add functions to r/w binary files Add initial data parsing Add overhead "extractor" for multiple cores configurations
-rw-r--r--include/pm_common.h123
-rw-r--r--pm_test/pm_common.c405
2 files changed, 528 insertions, 0 deletions
diff --git a/include/pm_common.h b/include/pm_common.h
new file mode 100644
index 0000000..974dc9a
--- /dev/null
+++ b/include/pm_common.h
@@ -0,0 +1,123 @@
1/*
2 * preemption and migration overhead measurement
3 *
4 * common data structures and defines
5 */
6#ifndef PM_COMMON_H
7#define PM_COMMON_H
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <errno.h>
14
15#include <sys/mman.h>
16#include <sys/types.h>
17#include <sys/stat.h>
18#include <fcntl.h>
19
20/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
21 * when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
22 */
23
24/* Definitions and variables related to experimental measurement.
25 * What I eventually want is a test script that will cycle though
26 * different WSS and CACHESIZE, recompiling this program at
27 * each round (easier for memory management), but running all test
28 * without human intervention
29 */
30/* working set size, in KB */
31#define WSS 1024
32/* Cache size:
33 * Niagara: L2: 3MB
34 * Koruna: L2: 6MB every 2 cores
35 * Ludwig: L2: 3MB every 2 cores, L3 16MB
36 * Pound: L2: 256KB, L3 8MB
37 */
38#define CACHESIZE (6 * 1024)
39
40/* number of measurements that can be stored */
41#define DATAPOINTS 100000
42
43/* The following macro don't need (hopefully) any modification */
44
45/* Cache alignment (cache line size)
46 * Niagara, Koruna, Ludwig, Pound cache line size: 64B
47 */
48#define CACHEALIGNMENT 64
49/* ints per WSS */
50#define INTS_PER_WSS (WSS*1024)/(sizeof(int))
51/* reads vs. writes ratio */
52#define READRATIO 75
53/* random seed */
54#define SEEDVAL 12345
55/* number of "working sets" to cycle through */
56#define NUMWS ((CACHESIZE*2)/WSS)+2
57/* runtime in milliseconds -- 60s*/
58#define SIMRUNTIME 15000
59/* times to read warm memory to get accurate data */
60#define REFTOTAL 3
61
62#define NS_PER_MS 1000000
63
64struct data_entry {
65 unsigned long long timestamp;
66
67 /* cC cold cache access
68 * hH hot cache access
69 * pP preeption / migration
70 */
71 char access_type;
72 unsigned long long access_time;
73
74 unsigned int cpu;
75 unsigned long job_count;
76 unsigned long sched_count;
77 unsigned long last_rt_task;
78 unsigned long long preemption_length;
79};
80
81/* serializable data entry */
82struct saved_data_entry {
83 char access_type;
84 unsigned long long access_time;
85 unsigned int cpu;
86 unsigned long long preemption_length;
87};
88
89/* long long is a looot of time and should be enough for our needs
90 * However we keep the saved data in ull and leave to the analysis
91 * dealing with the conversion
92 */
93struct full_ovd_plen {
94 /* "current" cpu */
95 unsigned int curr_cpu;
96 /* last "seen" cpu (curr != last --> migration) */
97 unsigned int last_cpu;
98 /* overhead */
99 long long ovd;
100 /* preemption length */
101 long long plen;
102};
103
104struct ovd_plen {
105 long long ovd;
106 long long plen;
107};
108
109/* write data_entry -> saved_data_entry on disk */
110int serialize_data_entry(char *filename, struct data_entry *samples, int num);
111/* read saved_data_entry from disk */
112int read_sdata_entry(const char *filename, struct saved_data_entry **samples);
113
114/* get valid overhead from trace file */
115int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs);
116/* get ovd and pm length for different cores configurations */
117void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
118 unsigned int cores_per_l2, unsigned int cores_per_chip,
119 struct ovd_plen *preempt, int *pcount,
120 struct ovd_plen *samel2, int *l2count,
121 struct ovd_plen *samechip, int *chipcount,
122 struct ovd_plen *offchip, int *offcount);
123#endif
diff --git a/pm_test/pm_common.c b/pm_test/pm_common.c
new file mode 100644
index 0000000..49ad2e7
--- /dev/null
+++ b/pm_test/pm_common.c
@@ -0,0 +1,405 @@
1/*
2 * pm_common.c
3 *
4 * Read / write data samples on file in binary format
5 * Perform first elaboration on the (possibily big) samples set
6 */
7#include "pm_common.h"
8
9#define BLOCK_MUL 500
10#define SBLOCK_SIZE 1024
11
12#define NUMHOTREADS 3
13#define min(a,b) ((a)<(b)?(a):(b))
14
15#ifdef DEBUG
16#define dprintf(arg...) fprintf(stderr,arg)
17#else
18#define dprintf(arg...)
19#endif
20
21/* simple sequential write on disk.
22 * (concurrent writes must be protected)
23 *
24 * saved_data_entry is ~ 20 B; so 100000 Datapoinst are ~ 2MB
25 */
26int serialize_data_entry(char *filename, struct data_entry *samples, int num)
27{
28 int fd;
29 int i, j;
30
31 /* buffer some data in memory before writing */
32 struct saved_data_entry to_save[SBLOCK_SIZE];
33
34 fd = open(filename, O_WRONLY | O_APPEND | O_CREAT, 0660);
35 if (fd == -1){
36 perror("open");
37 return -1;
38 }
39
40 for (i = 0; i < num / SBLOCK_SIZE; i++) {
41 memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
42 for (j = 0; j < SBLOCK_SIZE; j++) {
43 to_save[j].access_type = samples[j].access_type;
44 to_save[j].access_time =
45 samples[j].access_time;
46 to_save[j].cpu = samples[j].cpu;
47 to_save[j].preemption_length =
48 samples[j].preemption_length;
49 }
50
51 samples = &samples[j];
52
53 if (write(fd, to_save, sizeof(struct saved_data_entry) * SBLOCK_SIZE) == -1) {
54 close(fd);
55 perror("Write failed\n");
56 return -1;
57 }
58 }
59
60 memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
61 for (j = 0; j < num % SBLOCK_SIZE; j++) {
62 to_save[j].access_type = samples[j].access_type;
63 to_save[j].access_time =
64 samples[j].access_time;
65 to_save[j].cpu = samples[j].cpu;
66 to_save[j].preemption_length =
67 samples[j].preemption_length;
68 }
69
70 if (write(fd, to_save, sizeof(struct saved_data_entry) * j) == -1) {
71 close(fd);
72 perror("Write failed\n");
73 return -1;
74 }
75
76 dprintf("Written %d entries\n", i*SBLOCK_SIZE + j);
77
78 close(fd);
79 return 0;
80}
81
82/*
83 * Presumably, all data will be written on little endian machines.
84 * I assume the binary format is little endian
85 *
86 * return -1 on error
87 * return number of samples on success
88 */
89int read_sdata_entry(const char *filename, struct saved_data_entry **samples)
90{
91 int fd;
92 int i,j;
93
94 int num_samples, file_size;
95 struct saved_data_entry block_read[BLOCK_MUL];
96
97 int bytes_read;
98
99 fd = open(filename, O_RDONLY);
100 if(fd == -1){
101 perror("open");
102 return -1;
103 }
104
105 /* Compute file size */
106 file_size = lseek(fd, 0, SEEK_END);
107 if(file_size == -1){
108 close(fd);
109 perror("lseek");
110 return -1;
111 }
112
113 /* Return to start position */
114 if(lseek(fd, 0, SEEK_SET) == -1){
115 close(fd);
116 perror("lseek");
117 return -1;
118 }
119
120 num_samples = file_size / sizeof(struct saved_data_entry);
121 dprintf("N entries: %d\n", num_samples);
122
123 /* Allocate memory for data_entry samples */
124 *samples = (struct saved_data_entry *) (malloc(num_samples *
125 sizeof(struct saved_data_entry)));
126 if(*samples == NULL){
127 close(fd);
128 perror("malloc");
129 return -1;
130 }
131
132 /* Read all the file */
133 j = 0;
134 do {
135 /* Read file (in BLOCK_MUL * sizeof(saved_data_entrty) block size) */
136 bytes_read = read(fd, &block_read, sizeof(struct saved_data_entry) * BLOCK_MUL);
137 if (bytes_read == -1) {
138 perror("Cannot read\n");
139 close(fd);
140 free(*samples);
141 return -1;
142 }
143
144 for (i = 0; i < (bytes_read / sizeof(struct saved_data_entry)); i++, j++)
145 (*samples)[j] = block_read[i];
146
147 } while(bytes_read > 0);
148
149 close(fd);
150
151#ifdef DEBUG
152 for (i = 0; i < num_samples; i++)
153 fprintf(stderr,"(%c) - ACC %llu, CPU %u, PLEN %llu\n",
154 (*samples)[i].access_type,
155 (*samples)[i].access_time, (*samples)[i].cpu,
156 (*samples)[i].preemption_length);
157#endif
158 return num_samples;
159}
160
161/*
162 * get_valid_ovd(): get valid overheads from trace file
163 *
164 * input:
165 * @filename: input trace file name
166 *
167 * output:
168 * @full_costs: array of all overheads and preemption length associated
169 * with valid measures
170 *
171 * full_costs MUST be initialized before entering this function and MUST
172 * be at least DATAPOINTS long
173 *
174 * @return: number of valid measures read (implicit "true" length of
175 * output array.)
176 * If error return < 0
177 */
178int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs)
179{
180 struct saved_data_entry *samples;
181 /* total number of samples */
182 int num_samples;
183 /* number of valid samples */
184 int scount = 0;
185
186 int i;
187
188 /* do we have a valid hot read? */
189 int valid_hot_reads = 0;
190 /* how many consecutive hot reads? */
191 int total_hot_reads = 0;
192 /* do we have a valid hot cost? */
193 int valid_hot_cost = 0;
194 /* are the hot reads valid so far? */
195 int no_invalid_reads = 1;
196 /* what is the last cpu seen so far? */
197 unsigned int l_cpu = 0;
198
199 unsigned long long hot_cost;
200
201 /* if output array isn't long enough, early segfault */
202 memset(full_costs, 0, DATAPOINTS * sizeof(struct full_ovd_plen));
203
204 if ((num_samples = read_sdata_entry(filename, &samples)) < 0) {
205 printf("Cannot read %s\n", filename);
206 return -1;
207 }
208
209#ifdef DEBUG
210 fprintf(stderr, "Start Valid overhead\n");
211 /* write this on stderr so we can redirect it on a different stream */
212 for (i = 0; i < num_samples; i++)
213 fprintf(stderr, "(%c) - ACC %llu, CPU %u, PLEN %llu\n",
214 samples[i].access_type,
215 samples[i].access_time, samples[i].cpu,
216 samples[i].preemption_length);
217 fprintf(stderr, "End Valid ovrhead\n");
218#endif
219 hot_cost = samples[0].access_time;
220 /* get valid overheads reads */
221 for (i = 0; i < num_samples; i++) {
222
223 if (samples[i].access_type == 'H' ||
224 samples[i].access_type == 'h') {
225 /* NUMHOTREADS consecutive 'H' hot reads should
226 * (hopefully) appear. Take the minimum
227 * of all valid reads up to when the first
228 * invalid 'h' read appears.
229 */
230 total_hot_reads++;
231 if (no_invalid_reads && samples[i].access_type == 'H') {
232
233 valid_hot_reads++;
234 if(valid_hot_reads == 1) {
235 hot_cost = samples[i].access_time;
236 fprintf(stderr, "h1 = %llu\n", hot_cost);
237 }
238 else {
239 hot_cost = min(hot_cost, samples[i].access_time);
240 fprintf(stderr, "hm = %llu\n", hot_cost);
241 }
242
243 } else {
244 /* no valid hot reads found */
245 no_invalid_reads = 0;
246 }
247
248 if (total_hot_reads == NUMHOTREADS) {
249 /* check if we have a valid hotread value */
250 if (valid_hot_reads > 0)
251 valid_hot_cost = 1;
252 else
253 valid_hot_cost = 0;
254
255 /* reset flags */
256 valid_hot_reads = 0;
257 total_hot_reads = 0;
258 no_invalid_reads = 1;
259 }
260
261 /* update last seen cpu */
262 l_cpu = samples[i].cpu;
263
264 } else {
265 if (samples[i].access_type == 'P' ||
266 samples[i].access_type == 'p') {
267
268 /* this may be a preemption or a migration
269 * but we do not care now: just report it
270 * if it happened after a valid hot read
271 * and the preemption measure is valid
272 */
273 if (valid_hot_cost && samples[i].access_type == 'P') {
274
275 full_costs[scount].curr_cpu = samples[i].cpu;
276 full_costs[scount].last_cpu = l_cpu;
277 full_costs[scount].ovd = (long long)
278 samples[i].access_time - hot_cost;
279
280 fprintf(stderr, "hs = %llu\n", hot_cost);
281 fprintf(stderr, "s1 = %llu\n", samples[i].access_time);
282 fprintf(stderr, "o1 = %lld\n", full_costs[scount].ovd);
283
284 full_costs[scount].plen = (long long)
285 samples[i].preemption_length;
286
287 dprintf("%u %u %lld %lld\n", full_costs[scount].curr_cpu,
288 full_costs[scount].last_cpu,
289 full_costs[scount].ovd, full_costs[scount].plen);
290
291 scount++;
292 }
293
294 /* update last seen cpu */
295 l_cpu = samples[i].cpu;
296 }
297 }
298 }
299
300 dprintf("End of valid entries\n");
301
302 free(samples);
303 return scount;
304}
305
306/*
307 * get_ovd_plen(): get overheads and preemption/migration length for
308 * different cores configurations
309 *
310 * For most architecture we can have at most 3 cache levels on the same chip
311 * and then off chip migrations. In the worst case we need to measure:
312 * [1] same core preemption, [2] same L2 migration,
313 * [3] same L3 (different L2, same chip) migration, [4] off chip migration.
314 *
315 * input:
316 * @full_costs: see get_valid_ovd()
317 * @num_samples: number of meaningful samples in full_costs
318 * (and in output arrays)
319 * @cores_per_l2: how many cores share an l2 cache (read below)
320 * @cores_per_chip: guess :)
321 *
322 * output:
323 * @preempt: [1]
324 * @samel2: [2]
325 * @samechip: [3]
326 * @offchip: [4]
327 *
328 * if samel2 is NULL, then L3 is not present and samel2 is equivalent to
329 * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used.
330 */
331void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
332 unsigned int cores_per_l2, unsigned int cores_per_chip,
333 struct ovd_plen *preempt, int *pcount,
334 struct ovd_plen *samel2, int *l2count,
335 struct ovd_plen *samechip, int *chipcount,
336 struct ovd_plen *offchip, int *offcount)
337{
338 int i;
339 *pcount = 0;
340 *l2count = 0;
341 *chipcount = 0;
342 *offcount = 0;
343
344 for (i = 0; i < num_samples; i++) {
345 dprintf("i = %d\n", i);
346
347 if (full_costs[i].curr_cpu == full_costs[i].last_cpu) {
348 dprintf("preempt\n");
349 /* preemption */
350 preempt[*pcount].ovd = full_costs[i].ovd;
351 preempt[*pcount].plen = full_costs[i].plen;
352 (*pcount)++;
353
354 continue;
355
356 }
357
358 if (samel2) {
359 dprintf("l2\n");
360
361 if ((full_costs[i].curr_cpu / cores_per_l2) == (full_costs[i].last_cpu / cores_per_l2)) {
362 dprintf("same L2\n");
363 /* same L2 migration */
364 samel2[*l2count].ovd = full_costs[i].ovd;
365 samel2[*l2count].plen = full_costs[i].plen;
366 (*l2count)++;
367
368 continue;
369 }
370
371 if (((full_costs[i].curr_cpu / cores_per_l2) != (full_costs[i].last_cpu / cores_per_l2)) &&
372 ((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip))) {
373 dprintf("same L3\n");
374 /* same L3 migration */
375 samechip[*chipcount].ovd = full_costs[i].ovd;
376 samechip[*chipcount].plen = full_costs[i].plen;
377 (*chipcount)++;
378
379 continue;
380 }
381 } else {
382 dprintf("same chip\n");
383 /* samel2 == NULL */
384 /* check same chip migration */
385 if ((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip)) {
386
387 samechip[*chipcount].ovd = full_costs[i].ovd;
388 samechip[*chipcount].plen = full_costs[i].plen;
389 (*chipcount)++;
390
391 continue;
392 }
393 }
394 dprintf("offchip\n");
395 /* if we are here it should have been a offchip migration */
396 offchip[*offcount].ovd = full_costs[i].ovd;
397 offchip[*offcount].plen = full_costs[i].plen;
398 (*offcount)++;
399 }
400 dprintf("pcount = %d\n", *pcount);
401 dprintf("chipcount = %d\n", *chipcount);
402 dprintf("l2count = %d\n", *l2count);
403 dprintf("offcount = %d\n", *offcount);
404}
405