pm_task-load-no-ctrl-page/pm_common.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

/*
 * preemption and migration overhead measurement
 *
 * common data structures and defines
 */
#ifndef PM_COMMON_H
#define PM_COMMON_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

/* WSS, CACHESIZE, DATAPOINTS may be given as commandline define
 * when ricompiling this test for different WSS, CACHESIZE and (?) datapoints
 */

/* Definitions and variables related to experimental measurement.
 * What I eventually want is a test script that will cycle though
 * different WSS and CACHESIZE, recompiling this program at
 * each round (easier for memory management), but running all test
 * without human intervention
 */
/* working set size, in KB */
#define WSS	1024
/* Cache size:
 * Niagara: L2: 3MB
 * Koruna: L2: 6MB every 2 cores
 * Ludwig: L2: 3MB every 2 cores, L3 16MB
 * Pound:  L2: 256KB, L3 8MB
 */
#define CACHESIZE	(6 * 1024)

/* number of measurements that can be stored */
#define DATAPOINTS	100000

/* The following macro don't need (hopefully) any modification */

/* Cache alignment (cache line size)
 * Niagara, Koruna, Ludwig, Pound cache line size: 64B
 */
#define CACHEALIGNMENT	64
/* ints per WSS */
#define	INTS_PER_WSS	(WSS*1024)/(sizeof(int))
/* reads vs. writes ratio */
#define READRATIO	75
/* random seed */
#define SEEDVAL		12345
/* number of "working sets" to cycle through */
#define NUMWS		((CACHESIZE*2)/WSS)+2
/* runtime in milliseconds -- 60s*/
#define SIMRUNTIME	15000
/* times to read warm memory to get accurate data */
#define REFTOTAL	3

#define NS_PER_MS	1000000

struct data_entry {
	unsigned long long timestamp;

	/* cC cold cache access
	 * hH hot cache access
	 * pP preeption / migration
	 */
	char access_type;
	unsigned long long access_time;

	unsigned int cpu;
	unsigned long job_count;
	unsigned long sched_count;
	unsigned long last_rt_task;
	unsigned long long preemption_length;
};

/* serializable data entry */
struct saved_data_entry {
	char access_type;
	unsigned long long access_time;
	unsigned int cpu;
	unsigned long long preemption_length;
};

/* long long is a looot of time and should be enough for our needs
 * However we keep the saved data in ull and leave to the analysis
 * dealing with the conversion
 */
struct full_ovd_plen {
	/* "current" cpu */
	unsigned int curr_cpu;
	/* last "seen" cpu (curr != last --> migration) */
	unsigned int last_cpu;
	/* overhead */
	long long ovd;
	/* preemption length */
	long long plen;
};

struct ovd_plen {
	long long ovd;
	long long plen;
};

/* write data_entry -> saved_data_entry on disk */
int serialize_data_entry(char *filename, struct data_entry *samples, int num);
/* read saved_data_entry from disk */
int read_sdata_entry(const char *filename, struct saved_data_entry **samples);

/* get valid overhead from trace file */
int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs);
/* get ovd and pm length for different cores configurations */
void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
		 unsigned int cores_per_l2, unsigned int cores_per_chip,
		 struct ovd_plen *preempt, int *pcount,
		 struct ovd_plen *samel2, int *l2count,
		 struct ovd_plen *samechip, int *chipcount,
		 struct ovd_plen *offchip, int *offcount);
#endif