1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
/*
* preemption and migration overhead measurement
*
* common data structures and defines
*/
#ifndef PM_COMMON_H
#define PM_COMMON_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
* when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
*/
/* Definitions and variables related to experimental measurement.
* What I eventually want is a test script that will cycle though
* different WSS and CACHESIZE, recompiling this program at
* each round (easier for memory management), but running all test
* without human intervention
*/
/* working set size, in KB */
#define WSS 1024
/* Cache size:
* Niagara: L2: 3MB
* Koruna: L2: 6MB every 2 cores
* Ludwig: L2: 3MB every 2 cores, L3 16MB
* Pound: L2: 256KB, L3 8MB
*/
#define CACHESIZE (6 * 1024)
/* number of measurements that can be stored */
#define DATAPOINTS 100000
/* The following macro don't need (hopefully) any modification */
/* Cache alignment (cache line size)
* Niagara, Koruna, Ludwig, Pound cache line size: 64B
*/
#define CACHEALIGNMENT 64
/* ints per WSS */
#define INTS_PER_WSS (WSS*1024)/(sizeof(int))
/* reads vs. writes ratio */
#define READRATIO 75
/* random seed */
#define SEEDVAL 12345
/* number of "working sets" to cycle through */
#define NUMWS ((CACHESIZE*2)/WSS)+2
/* runtime in milliseconds -- 60s*/
#define SIMRUNTIME 15000
/* times to read warm memory to get accurate data */
#define REFTOTAL 3
#define NS_PER_MS 1000000
struct data_entry {
unsigned long long timestamp;
/* cC cold cache access
* hH hot cache access
* pP preeption / migration
*/
char access_type;
unsigned long long access_time;
unsigned int cpu;
unsigned long job_count;
unsigned long sched_count;
unsigned long last_rt_task;
unsigned long long preemption_length;
};
/* serializable data entry */
struct saved_data_entry {
char access_type;
unsigned long long access_time;
unsigned int cpu;
unsigned long long preemption_length;
};
/* long long is a looot of time and should be enough for our needs
* However we keep the saved data in ull and leave to the analysis
* dealing with the conversion
*/
struct full_ovd_plen {
/* "current" cpu */
unsigned int curr_cpu;
/* last "seen" cpu (curr != last --> migration) */
unsigned int last_cpu;
/* overhead */
long long ovd;
/* preemption length */
long long plen;
};
struct ovd_plen {
long long ovd;
long long plen;
};
/* write data_entry -> saved_data_entry on disk */
int serialize_data_entry(char *filename, struct data_entry *samples, int num);
/* read saved_data_entry from disk */
int read_sdata_entry(const char *filename, struct saved_data_entry **samples);
/* get valid overhead from trace file */
int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs);
/* get ovd and pm length for different cores configurations */
void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
unsigned int cores_per_l2, unsigned int cores_per_chip,
struct ovd_plen *preempt, int *pcount,
struct ovd_plen *samel2, int *l2count,
struct ovd_plen *samechip, int *chipcount,
struct ovd_plen *offchip, int *offcount);
#endif
|