include/pm_common.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

/*
 * preemption and migration overhead measurement
 *
 * common data structures and defines
 */
#ifndef PM_COMMON_H
#define PM_COMMON_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
 * when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
 * ATM only WSS can be passed through scons building mechanism
 */

/* Definitions and variables related to experimental measurement.
 * What I eventually want is a test script that will cycle though
 * different WSS and CACHESIZE, recompiling this program at
 * each round (easier for memory management), but running all test
 * without human intervention
 */
/*
 * default working set size, in KB
 * non-default WSS are taken from the test script (-DWSS=...)
 */
#ifndef WSS
#define WSS	3072
#endif
/* Cache size:
 * Niagara: L2: 3MB
 * Koruna: L2: 6MB every 2 cores
 * Ludwig: L2: 3MB every 2 cores, L3 12MB
 * Pound:  L2: 256KB, L3 8MB
 */
#define CACHESIZE	(12 * 1024)

/* number of measurements that can be stored per single pm_task */
#define DATAPOINTS	100000

/* The following macro don't need (hopefully) any modification */

/* Cache alignment (cache line size)
 * Niagara, Koruna, Ludwig, Pound cache line size: 64B
 */
#define CACHEALIGNMENT	64
/* ints per WSS */
#define	INTS_PER_WSS	(WSS*1024)/(sizeof(int))
/* reads vs. writes ratio */
#define READRATIO	75
/* random seed */
#define SEEDVAL		12345
/* number of "working sets" to cycle through */
#define NUMWS		((CACHESIZE*2)/WSS)+2
/* runtime in milliseconds -- 60s*/
#define SIMRUNTIME	60000
/* times to read warm memory to get accurate data */
/* preliminary experiments on Ludwig shows that we can safely set
 * this to just 2 iteration (first and second 'H' access are ~ equal)
 * (it was 3)
 */
#define REFTOTAL	2

#define NS_PER_MS	1000000

struct data_entry {
	unsigned long long timestamp;

	/* cC cold cache access
	 * hH hot cache access
	 * pP preeption / migration
	 */
	char access_type;
	unsigned long long access_time;

	unsigned int cpu;
	unsigned long job_count;
	unsigned long sched_count;
	unsigned long last_rt_task;
	unsigned long long preemption_length;
};

/* serializable data entry */
struct saved_data_entry {
	char access_type;
	unsigned long long access_time;
	unsigned int cpu;
	unsigned long long preemption_length;
};

/* long long is a looot of time and should be enough for our needs
 * However we keep the saved data in ull and leave to the analysis
 * dealing with the conversion
 */
struct full_ovd_plen {
	/* "current" cpu */
	unsigned int curr_cpu;
	/* last "seen" cpu (curr != last --> migration) */
	unsigned int last_cpu;
	/* overhead */
	long long ovd;
	/* preemption length */
	long long plen;
};

struct ovd_plen {
	long long ovd;
	long long plen;
};

/* write data_entry -> saved_data_entry on disk */
int serialize_data_entry(char *filename, struct data_entry *samples, int num);
/* read saved_data_entry from disk */
int read_sdata_entry(const char *filename, struct saved_data_entry **samples);

/* get valid overhead from trace file */
int get_valid_ovd(const char *filename, struct full_ovd_plen **full_costs,
		int wss, int tss);

/* get ovd and pm length for different cores configurations (on uma xeon) */
/* Watch out for different topologies:
 * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
 */
void get_ovd_plen_umaxeon(struct full_ovd_plen *full_costs, int num_samples,
		unsigned int cores_per_l2, unsigned int num_phys_cpu,
		struct ovd_plen *preempt, int *pcount,
		struct ovd_plen *samel2, int *l2count,
		struct ovd_plen *samechip, int *chipcount,
		struct ovd_plen *offchip, int *offcount);

/* get ovd and pm length for different cores configurations */
void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
		 unsigned int cores_per_l2, unsigned int cores_per_chip,
		 struct ovd_plen *preempt, int *pcount,
		 struct ovd_plen *samel2, int *l2count,
		 struct ovd_plen *samechip, int *chipcount,
		 struct ovd_plen *offchip, int *offcount);
#endif