diff options
Diffstat (limited to 'bin/testcounters.c')
-rw-r--r-- | bin/testcounters.c | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/bin/testcounters.c b/bin/testcounters.c new file mode 100644 index 0000000..207851a --- /dev/null +++ b/bin/testcounters.c | |||
@@ -0,0 +1,401 @@ | |||
1 | #if 0 | ||
2 | /* done in Makefile */ | ||
3 | #define _GNU_SOURCE /* or _BSD_SOURCE or _SVID_SOURCE */ | ||
4 | #endif | ||
5 | |||
6 | #include "asm/unistd.h" /* from kernel source tree */ | ||
7 | #include <unistd.h> /* for syscall */ | ||
8 | |||
9 | #include <sys/ioctl.h> | ||
10 | #include <stdio.h> | ||
11 | #include <stdlib.h> | ||
12 | #include <sched.h> | ||
13 | #include <stdint.h> /* rt_param needs uint32 */ | ||
14 | |||
15 | #include "../../litmus-rt/include/linux/perf_event.h" | ||
16 | |||
17 | #include <litmus/rt_param.h> /* page size macro */ | ||
18 | |||
19 | /* from kernel tools/perf/perf.h */ | ||
20 | static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, | ||
21 | int cpu, int group_fd, unsigned long flags) | ||
22 | { | ||
23 | attr->size = sizeof(*attr); | ||
24 | return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); | ||
25 | } | ||
26 | |||
27 | #define C(x) (PERF_COUNT_HW_CACHE_##x) | ||
28 | #define ATTR_CONFIG_CACHE(cache, op, result) \ | ||
29 | (((C(cache) & 0xffULL) << 0) | \ | ||
30 | ((C(op) & 0xffULL) << 8) | \ | ||
31 | ((C(result) & 0xffULL) << 16)) | ||
32 | |||
33 | #define ATTR_CONFIG(event, umask) \ | ||
34 | ((((event) & 0xffULL) << 0) | \ | ||
35 | (((umask) & 0xffULL) << 8)) | ||
36 | |||
37 | struct perf_event_attr perf_event_attr = { | ||
38 | .type = 0, /* set per initilized event */ | ||
39 | .size = 0, /* set later */ | ||
40 | .config = 0, /* set per initilized event */ | ||
41 | { .sample_period = 0, }, /* is a counter, so no period */ | ||
42 | .disabled = 0, /* event is enabled */ | ||
43 | .inherit = 0, /* children don't inherit */ | ||
44 | .pinned = 0, /* set per initilized event */ | ||
45 | .exclusive = 0, /* set per initilized event */ | ||
46 | .exclude_user = 0, /* don't count user */ | ||
47 | .exclude_kernel = 0, /* ditto kernel */ | ||
48 | .exclude_hv = 0, /* ditto hypervisor */ | ||
49 | .exclude_idle = 0, /* don't count when idle */ | ||
50 | .mmap = 0, /* include mmap data */ | ||
51 | .comm = 0, /* include comm data */ | ||
52 | }; | ||
53 | |||
54 | /* Pound */ | ||
55 | #define NR_CPUS 4 | ||
56 | #define CACHE_SIZE_MB 8 | ||
57 | #define ASSOC 16 | ||
58 | #define LINE_SIZE 64 | ||
59 | #define CACHE_SIZE (CACHE_SIZE_MB * 1024 * 1024) | ||
60 | |||
61 | /* arena size in bytes */ | ||
62 | //#define ARENA_SIZE (CACHE_SIZE * 14 / 16) | ||
63 | #define ARENA_SIZE (CACHE_SIZE * 1) | ||
64 | |||
65 | /* number of pages in arena */ | ||
66 | #define ARENA_PAGES (ARENA_SIZE / PAGE_SIZE) | ||
67 | |||
68 | /* number of cache lines per page */ | ||
69 | #define PAGE_LINES (PAGE_SIZE / LINE_SIZE) | ||
70 | |||
71 | /* number of cache lines in arena */ | ||
72 | #define ARENA_LINES (ARENA_SIZE / LINE_SIZE) | ||
73 | |||
74 | /* number of integers in arena */ | ||
75 | #define ARENA_INTS (ARENA_SIZE / sizeof(int)) | ||
76 | |||
77 | /* number of integers in a page */ | ||
78 | #define PAGE_INTS (PAGE_SIZE / sizeof(int)) | ||
79 | |||
80 | /* number of integers in a cache line */ | ||
81 | #define LINE_INTS (LINE_SIZE / sizeof(int)) | ||
82 | |||
83 | /* convert page number and cache line number to an integer index */ | ||
84 | #define PAGE_AND_LINE_TO_IDX(page, line) \ | ||
85 | (((page) * PAGE_INTS) + ((line) * LINE_INTS)) | ||
86 | |||
87 | |||
88 | /* not really a good way to do this */ | ||
89 | inline int randrange(const int max) | ||
90 | { | ||
91 | return (rand() / (RAND_MAX / max + 1)); | ||
92 | } | ||
93 | |||
94 | void sequential(int *items, const int len) | ||
95 | { | ||
96 | int i; | ||
97 | for (i = 0; i < len; i++) | ||
98 | items[i] = (i + 1) % len; | ||
99 | } | ||
100 | |||
101 | /* Sattolo's algorithm makes a cycle. */ | ||
102 | void sattolo(int *items, const int len) | ||
103 | { | ||
104 | int i; | ||
105 | for (i = 0; i < len; i++) | ||
106 | items[i] = i; | ||
107 | while (1 < i--) { | ||
108 | /* 0 <= j < i */ | ||
109 | int t, j = randrange(i); | ||
110 | t = items[i]; | ||
111 | items[i] = items[j]; | ||
112 | items[j] = t; | ||
113 | } | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Write the order to read the arena into the arena. Each page in the arena is | ||
118 | * read back, but the page is read in a random order to prevent the prefetcher | ||
119 | * from working. | ||
120 | */ | ||
121 | static void init_arena_page_line_order(int *arena, int *page_line_order) | ||
122 | { | ||
123 | int cur_page; | ||
124 | for (cur_page = 0; cur_page < ARENA_PAGES; cur_page++) { | ||
125 | /* for each page in the arena */ | ||
126 | int cur_line; | ||
127 | for (cur_line = 0; cur_line < PAGE_LINES; cur_line++) { | ||
128 | /* for each line in the page */ | ||
129 | const int idx = PAGE_AND_LINE_TO_IDX(cur_page, | ||
130 | cur_line); | ||
131 | const int next_line = page_line_order[cur_line]; | ||
132 | int next_idx = PAGE_AND_LINE_TO_IDX(cur_page, | ||
133 | next_line); | ||
134 | |||
135 | if (!next_line) { | ||
136 | /* special case: last line in the page */ | ||
137 | if (cur_page < ARENA_PAGES - 1) { | ||
138 | /* arena has more pages: go to next */ | ||
139 | next_idx = PAGE_AND_LINE_TO_IDX( | ||
140 | (cur_page + 1), 0); | ||
141 | } else { | ||
142 | /* the very last element */ | ||
143 | next_idx = 0; | ||
144 | } | ||
145 | } | ||
146 | arena[idx] = next_idx; | ||
147 | } | ||
148 | } | ||
149 | } | ||
150 | |||
151 | static int loop_once(const int perf_fd, int *arena) | ||
152 | { | ||
153 | int i = 0, j; | ||
154 | do { | ||
155 | i = arena[i]; | ||
156 | j = i; | ||
157 | } while (i); | ||
158 | return j; | ||
159 | } | ||
160 | |||
161 | static int set_affinity(int cpu) | ||
162 | { | ||
163 | cpu_set_t cpu_set; | ||
164 | CPU_ZERO(&cpu_set); | ||
165 | CPU_SET(cpu, &cpu_set); | ||
166 | return sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set); | ||
167 | } | ||
168 | |||
169 | struct perf_fd { | ||
170 | int fd; | ||
171 | char *name; | ||
172 | enum perf_type_id type; | ||
173 | __u64 config; | ||
174 | __u64 exclusive : 1, | ||
175 | pinned : 1, | ||
176 | __reserved_1 : 62; | ||
177 | }; | ||
178 | |||
179 | #define PERF_FD_EMPTY(p) \ | ||
180 | ((p)->fd == 0 && (p)->name == NULL && \ | ||
181 | (p)->type == 0 && (p)->config == 0) | ||
182 | #define PERF_FD_NON_EMPTY(p) (!PERF_FD_EMPTY(p)) | ||
183 | |||
184 | |||
185 | #if 0 | ||
186 | /* these events are always zero */ | ||
187 | static struct perf_fd perf_fds[] = { | ||
188 | { | ||
189 | .fd = -1, | ||
190 | .name = "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT", | ||
191 | .type = PERF_TYPE_RAW, | ||
192 | .config = ATTR_CONFIG(0x0f, 0x08), | ||
193 | .exclusive = 0, | ||
194 | .pinned = 0, | ||
195 | }, | ||
196 | { | ||
197 | .fd = -1, | ||
198 | .name = "MEM_UNCORE_RETIRED.REMOTE_DRAM", | ||
199 | .type = PERF_TYPE_RAW, | ||
200 | .config = ATTR_CONFIG(0x0f, 0x10), | ||
201 | .exclusive = 0, /* child events cannot be exclusive */ | ||
202 | .pinned = 0, /* child events cannot be pinned */ | ||
203 | }, | ||
204 | { }, | ||
205 | }; | ||
206 | #endif | ||
207 | |||
208 | static struct perf_fd perf_fds[] = { | ||
209 | /* first element is assumed to be group leader */ | ||
210 | #if 0 | ||
211 | { | ||
212 | .fd = -1, | ||
213 | .name = "MEM_UNCORE_RETIRED.LOCAL_DRAM", | ||
214 | .type = PERF_TYPE_RAW, | ||
215 | .config = ATTR_CONFIG(0x0f, 0x20), | ||
216 | .exclusive = 1, /* group leader is scheduled exclusively */ | ||
217 | .pinned = 1, /* group leader is pinnned to CPU (always on) */ | ||
218 | }, | ||
219 | #endif | ||
220 | { | ||
221 | .fd = -1, | ||
222 | .name = "L2_RQSTS.PREFETCH_HIT", | ||
223 | .type = PERF_TYPE_RAW, | ||
224 | .config = ATTR_CONFIG(0x24, 0x40), | ||
225 | #if 0 | ||
226 | .exclusive = 0, | ||
227 | .pinned = 0, | ||
228 | #endif | ||
229 | .exclusive = 1, /* group leader is scheduled exclusively */ | ||
230 | .pinned = 1, /* group leader is pinnned to CPU (always on) */ | ||
231 | }, | ||
232 | { | ||
233 | .fd = -1, | ||
234 | .name = "L2_RQSTS.PREFETCH_MISS", | ||
235 | .type = PERF_TYPE_RAW, | ||
236 | .config = ATTR_CONFIG(0x24, 0x80), | ||
237 | .exclusive = 0, | ||
238 | .pinned = 0, | ||
239 | }, | ||
240 | { | ||
241 | .fd = -1, | ||
242 | .name = "MEM_LOAD_RETIRED.L3_MISS", | ||
243 | .type = PERF_TYPE_RAW, | ||
244 | .config = ATTR_CONFIG(0xcb, 0x10), | ||
245 | .exclusive = 0, | ||
246 | .pinned = 0, | ||
247 | }, | ||
248 | { | ||
249 | .fd = -1, | ||
250 | .name = "Off Core Response Counter", | ||
251 | .type = PERF_TYPE_HW_CACHE, | ||
252 | .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS), | ||
253 | #if 0 | ||
254 | /* read misses */ | ||
255 | .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS), | ||
256 | /* write misses */ | ||
257 | .config = ATTR_CONFIG_CACHE(LL, OP_WRITE, RESULT_MISS), | ||
258 | /* prefetch misses */ | ||
259 | .config = ATTR_CONFIG_CACHE(LL, OP_PREFETCH, RESULT_MISS), | ||
260 | #endif | ||
261 | .exclusive = 0, | ||
262 | .pinned = 0, | ||
263 | }, | ||
264 | { }, | ||
265 | }; | ||
266 | |||
267 | |||
268 | static inline void events_ioctl(const int request) | ||
269 | { | ||
270 | ioctl(perf_fds[0].fd, request); | ||
271 | } | ||
272 | |||
273 | static void do_read(double divide) | ||
274 | { | ||
275 | struct perf_fd *perf_fd; | ||
276 | for (perf_fd = perf_fds; PERF_FD_NON_EMPTY(perf_fd); perf_fd++) { | ||
277 | __u64 perf_val; | ||
278 | ssize_t ret; | ||
279 | ret = read(perf_fd->fd, &perf_val, sizeof(perf_val)); | ||
280 | if (0 >= ret) | ||
281 | printf("%50s: ERROR\n", perf_fd->name); | ||
282 | else | ||
283 | printf("%50s: %10.3f\n", | ||
284 | perf_fd->name, (perf_val / divide)); | ||
285 | ioctl(perf_fd->fd, PERF_EVENT_IOC_RESET); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | static void write_global_perf_attr(struct perf_fd *perf_fd) | ||
290 | { | ||
291 | perf_event_attr.type = perf_fd->type; | ||
292 | perf_event_attr.config = perf_fd->config; | ||
293 | perf_event_attr.exclusive = perf_fd->exclusive; | ||
294 | perf_event_attr.pinned = perf_fd->pinned; | ||
295 | } | ||
296 | |||
297 | #define CPU 0 | ||
298 | static int setup_perf(void) | ||
299 | { | ||
300 | /* cannot have pid == -1 and cpu == -1 */ | ||
301 | const int perf_pid = -1; /* -1: all tasks, 0: this task */ | ||
302 | const int perf_cpu = CPU; /* -1: all CPUs (follow task) */ | ||
303 | struct perf_fd *perf_fd; | ||
304 | int err = 0; | ||
305 | |||
306 | for (perf_fd = perf_fds; PERF_FD_NON_EMPTY(perf_fd); perf_fd++) { | ||
307 | /* make a group whose leader is the zeroth element */ | ||
308 | const int perf_group = perf_fds[0].fd; | ||
309 | |||
310 | /* setup the attributes to pass in */ | ||
311 | write_global_perf_attr(perf_fd); | ||
312 | |||
313 | perf_fd->fd = sys_perf_event_open(&perf_event_attr, perf_pid, | ||
314 | perf_cpu, perf_group, 0); | ||
315 | |||
316 | if (0 > perf_fd->fd) { | ||
317 | fprintf(stderr, "could not setup %s\n", perf_fd->name); | ||
318 | err = -1; | ||
319 | goto out; | ||
320 | } | ||
321 | } | ||
322 | out: | ||
323 | return err; | ||
324 | } | ||
325 | |||
326 | int main(int argc, char **argv) | ||
327 | { | ||
328 | |||
329 | const int task_cpu = CPU; | ||
330 | int ret = 0, i; | ||
331 | int *arena, *page_line_order; | ||
332 | |||
333 | if (set_affinity(task_cpu)) { | ||
334 | fprintf(stderr, "could not set affinity\n"); | ||
335 | ret = -1; | ||
336 | goto out; | ||
337 | } | ||
338 | |||
339 | arena = malloc(ARENA_SIZE); | ||
340 | if (!arena) { | ||
341 | fprintf(stderr, "could not allocate memory\n"); | ||
342 | ret = -1; | ||
343 | goto out; | ||
344 | } | ||
345 | |||
346 | page_line_order = malloc(PAGE_LINES * sizeof(*page_line_order)); | ||
347 | if (!page_line_order) { | ||
348 | fprintf(stderr, "could not allocate memory\n"); | ||
349 | ret = -1; | ||
350 | goto out; | ||
351 | } | ||
352 | |||
353 | sattolo(page_line_order, PAGE_LINES); | ||
354 | //sequential(page_line_order, PAGE_LINES); | ||
355 | init_arena_page_line_order(arena, page_line_order); | ||
356 | |||
357 | if (setup_perf()) { | ||
358 | ret = -1; | ||
359 | goto out; | ||
360 | } | ||
361 | |||
362 | printf("arena_size: %d\n", ARENA_SIZE); | ||
363 | printf("arena_lines: %d\n", ARENA_LINES); | ||
364 | |||
365 | printf("initially\n"); | ||
366 | do_read(1.0); | ||
367 | |||
368 | events_ioctl(PERF_EVENT_IOC_ENABLE); | ||
369 | loop_once(perf_fds[0].fd, arena); | ||
370 | events_ioctl(PERF_EVENT_IOC_DISABLE); | ||
371 | printf("after a loop\n"); | ||
372 | do_read(1.0); | ||
373 | |||
374 | events_ioctl(PERF_EVENT_IOC_ENABLE); | ||
375 | loop_once(perf_fds[0].fd, arena); | ||
376 | events_ioctl(PERF_EVENT_IOC_DISABLE); | ||
377 | printf("after another loop\n"); | ||
378 | do_read(1.0); | ||
379 | |||
380 | events_ioctl(PERF_EVENT_IOC_ENABLE); | ||
381 | loop_once(perf_fds[0].fd, arena); | ||
382 | events_ioctl(PERF_EVENT_IOC_DISABLE); | ||
383 | printf("after another loop\n"); | ||
384 | do_read(1.0); | ||
385 | |||
386 | events_ioctl(PERF_EVENT_IOC_ENABLE); | ||
387 | loop_once(perf_fds[0].fd, arena); | ||
388 | events_ioctl(PERF_EVENT_IOC_DISABLE); | ||
389 | printf("after another loop\n"); | ||
390 | do_read(1.0); | ||
391 | |||
392 | events_ioctl(PERF_EVENT_IOC_ENABLE); | ||
393 | for (i = 0; i < 100; i++) | ||
394 | loop_once(perf_fds[0].fd, arena); | ||
395 | events_ioctl(PERF_EVENT_IOC_DISABLE); | ||
396 | printf("after 100 loops\n"); | ||
397 | do_read(100.0); | ||
398 | |||
399 | out: | ||
400 | return ret; | ||
401 | } | ||