aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristopher Kenna <cjk@cs.unc.edu>2012-04-24 15:48:21 -0400
committerChristopher Kenna <cjk@cs.unc.edu>2012-04-24 15:48:21 -0400
commit339c2d488296e4d0c38bbd781307f95117e7ef66 (patch)
tree3980d1b1e6cb44d6f02328eeec1443580442d269
parente0732d7df71cd1db56b1ac7b4ad6132e6d1e00a8 (diff)
Color tools checkpoint.
-rw-r--r--.gitignore2
-rw-r--r--Makefile7
-rw-r--r--bin/colorbench.c3
-rw-r--r--bin/testcounters.c401
4 files changed, 410 insertions, 3 deletions
diff --git a/.gitignore b/.gitignore
index 255ac82..07d6c61 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,8 @@ rtspin
23cycles 23cycles
24measure_syscall 24measure_syscall
25colortest 25colortest
26colorbench
27testcounters
26 28
27# build system files 29# build system files
28.config 30.config
diff --git a/Makefile b/Makefile
index 7a95b91..1a48d8f 100644
--- a/Makefile
+++ b/Makefile
@@ -71,7 +71,7 @@ AR := ${CROSS_COMPILE}${AR}
71 71
72all = lib ${rt-apps} 72all = lib ${rt-apps}
73rt-apps = cycles base_task rt_launch rtspin release_ts measure_syscall \ 73rt-apps = cycles base_task rt_launch rtspin release_ts measure_syscall \
74 base_mt_task runtests colortest colorbench 74 base_mt_task runtests colortest colorbench testcounters
75 75
76.PHONY: all lib clean dump-config TAGS tags cscope help 76.PHONY: all lib clean dump-config TAGS tags cscope help
77 77
@@ -222,6 +222,9 @@ lib-colortest = -static
222obj-colorbench = colorbench.o color.o 222obj-colorbench = colorbench.o color.o
223lib-colorbench = -lpthread -lrt 223lib-colorbench = -lpthread -lrt
224 224
225obj-testcounters = testcounters.o
226lib-testcounters =
227
225# ############################################################################## 228# ##############################################################################
226# Build everything that depends on liblitmus. 229# Build everything that depends on liblitmus.
227 230
@@ -265,7 +268,7 @@ $(error Cannot build without access to the LITMUS^RT kernel source)
265endif 268endif
266 269
267kernel-unistd-hdrs := $(foreach file,${unistd-headers},${LITMUS_KERNEL}/$(file)) 270kernel-unistd-hdrs := $(foreach file,${unistd-headers},${LITMUS_KERNEL}/$(file))
268hdr-ok := $(shell egrep '\#include ["<]litmus/unistd' ${kernel-unistd-hdrs} ) 271hdr-ok := $(shell egrep '\#include .*litmus/unistd' ${kernel-unistd-hdrs} )
269ifeq ($(strip $(hdr-ok)),) 272ifeq ($(strip $(hdr-ok)),)
270$(info (!!) Could not find LITMUS^RT system calls in ${kernel-unistd-hdrs}.) 273$(info (!!) Could not find LITMUS^RT system calls in ${kernel-unistd-hdrs}.)
271$(error Your kernel headers do not seem to be LITMUS^RT headers) 274$(error Your kernel headers do not seem to be LITMUS^RT headers)
diff --git a/bin/colorbench.c b/bin/colorbench.c
index 0212d9c..cf0b927 100644
--- a/bin/colorbench.c
+++ b/bin/colorbench.c
@@ -14,7 +14,7 @@
14#include "color.h" 14#include "color.h"
15 15
16#define DEBUG 1 16#define DEBUG 1
17#define NR_LOOPS 1000 17#define NR_LOOPS 10
18 18
19/* pick ONE of these */ 19/* pick ONE of these */
20#define TIME_MONO 1 /* monotonic time */ 20#define TIME_MONO 1 /* monotonic time */
@@ -45,6 +45,7 @@
45 45
46#define PAGE_LOWER ((PAGE_SIZE - 1)) 46#define PAGE_LOWER ((PAGE_SIZE - 1))
47#define ARENA_INTS (arena_size / sizeof(int)) 47#define ARENA_INTS (arena_size / sizeof(int))
48#define ARENA_LINES (arena_size / LINE_SIZE)
48 49
49#define THREAD_CPU(t) (t * (NR_CPUS / nr_threads)) 50#define THREAD_CPU(t) (t * (NR_CPUS / nr_threads))
50 51
diff --git a/bin/testcounters.c b/bin/testcounters.c
new file mode 100644
index 0000000..207851a
--- /dev/null
+++ b/bin/testcounters.c
@@ -0,0 +1,401 @@
1#if 0
2/* done in Makefile */
3#define _GNU_SOURCE /* or _BSD_SOURCE or _SVID_SOURCE */
4#endif
5
6#include "asm/unistd.h" /* from kernel source tree */
7#include <unistd.h> /* for syscall */
8
9#include <sys/ioctl.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <sched.h>
13#include <stdint.h> /* rt_param needs uint32 */
14
15#include "../../litmus-rt/include/linux/perf_event.h"
16
17#include <litmus/rt_param.h> /* page size macro */
18
19/* from kernel tools/perf/perf.h */
20static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
21 int cpu, int group_fd, unsigned long flags)
22{
23 attr->size = sizeof(*attr);
24 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
25}
26
27#define C(x) (PERF_COUNT_HW_CACHE_##x)
28#define ATTR_CONFIG_CACHE(cache, op, result) \
29 (((C(cache) & 0xffULL) << 0) | \
30 ((C(op) & 0xffULL) << 8) | \
31 ((C(result) & 0xffULL) << 16))
32
33#define ATTR_CONFIG(event, umask) \
34 ((((event) & 0xffULL) << 0) | \
35 (((umask) & 0xffULL) << 8))
36
37struct perf_event_attr perf_event_attr = {
38 .type = 0, /* set per initilized event */
39 .size = 0, /* set later */
40 .config = 0, /* set per initilized event */
41 { .sample_period = 0, }, /* is a counter, so no period */
42 .disabled = 0, /* event is enabled */
43 .inherit = 0, /* children don't inherit */
44 .pinned = 0, /* set per initilized event */
45 .exclusive = 0, /* set per initilized event */
46 .exclude_user = 0, /* don't count user */
47 .exclude_kernel = 0, /* ditto kernel */
48 .exclude_hv = 0, /* ditto hypervisor */
49 .exclude_idle = 0, /* don't count when idle */
50 .mmap = 0, /* include mmap data */
51 .comm = 0, /* include comm data */
52};
53
54/* Pound */
55#define NR_CPUS 4
56#define CACHE_SIZE_MB 8
57#define ASSOC 16
58#define LINE_SIZE 64
59#define CACHE_SIZE (CACHE_SIZE_MB * 1024 * 1024)
60
61/* arena size in bytes */
62//#define ARENA_SIZE (CACHE_SIZE * 14 / 16)
63#define ARENA_SIZE (CACHE_SIZE * 1)
64
65/* number of pages in arena */
66#define ARENA_PAGES (ARENA_SIZE / PAGE_SIZE)
67
68/* number of cache lines per page */
69#define PAGE_LINES (PAGE_SIZE / LINE_SIZE)
70
71/* number of cache lines in arena */
72#define ARENA_LINES (ARENA_SIZE / LINE_SIZE)
73
74/* number of integers in arena */
75#define ARENA_INTS (ARENA_SIZE / sizeof(int))
76
77/* number of integers in a page */
78#define PAGE_INTS (PAGE_SIZE / sizeof(int))
79
80/* number of integers in a cache line */
81#define LINE_INTS (LINE_SIZE / sizeof(int))
82
83/* convert page number and cache line number to an integer index */
84#define PAGE_AND_LINE_TO_IDX(page, line) \
85 (((page) * PAGE_INTS) + ((line) * LINE_INTS))
86
87
88/* not really a good way to do this */
89inline int randrange(const int max)
90{
91 return (rand() / (RAND_MAX / max + 1));
92}
93
94void sequential(int *items, const int len)
95{
96 int i;
97 for (i = 0; i < len; i++)
98 items[i] = (i + 1) % len;
99}
100
101/* Sattolo's algorithm makes a cycle. */
102void sattolo(int *items, const int len)
103{
104 int i;
105 for (i = 0; i < len; i++)
106 items[i] = i;
107 while (1 < i--) {
108 /* 0 <= j < i */
109 int t, j = randrange(i);
110 t = items[i];
111 items[i] = items[j];
112 items[j] = t;
113 }
114}
115
116/*
117 * Write the order to read the arena into the arena. Each page in the arena is
118 * read back, but the page is read in a random order to prevent the prefetcher
119 * from working.
120 */
121static void init_arena_page_line_order(int *arena, int *page_line_order)
122{
123 int cur_page;
124 for (cur_page = 0; cur_page < ARENA_PAGES; cur_page++) {
125 /* for each page in the arena */
126 int cur_line;
127 for (cur_line = 0; cur_line < PAGE_LINES; cur_line++) {
128 /* for each line in the page */
129 const int idx = PAGE_AND_LINE_TO_IDX(cur_page,
130 cur_line);
131 const int next_line = page_line_order[cur_line];
132 int next_idx = PAGE_AND_LINE_TO_IDX(cur_page,
133 next_line);
134
135 if (!next_line) {
136 /* special case: last line in the page */
137 if (cur_page < ARENA_PAGES - 1) {
138 /* arena has more pages: go to next */
139 next_idx = PAGE_AND_LINE_TO_IDX(
140 (cur_page + 1), 0);
141 } else {
142 /* the very last element */
143 next_idx = 0;
144 }
145 }
146 arena[idx] = next_idx;
147 }
148 }
149}
150
151static int loop_once(const int perf_fd, int *arena)
152{
153 int i = 0, j;
154 do {
155 i = arena[i];
156 j = i;
157 } while (i);
158 return j;
159}
160
161static int set_affinity(int cpu)
162{
163 cpu_set_t cpu_set;
164 CPU_ZERO(&cpu_set);
165 CPU_SET(cpu, &cpu_set);
166 return sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set);
167}
168
169struct perf_fd {
170 int fd;
171 char *name;
172 enum perf_type_id type;
173 __u64 config;
174 __u64 exclusive : 1,
175 pinned : 1,
176 __reserved_1 : 62;
177};
178
179#define PERF_FD_EMPTY(p) \
180 ((p)->fd == 0 && (p)->name == NULL && \
181 (p)->type == 0 && (p)->config == 0)
182#define PERF_FD_NON_EMPTY(p) (!PERF_FD_EMPTY(p))
183
184
185#if 0
186/* these events are always zero */
187static struct perf_fd perf_fds[] = {
188 {
189 .fd = -1,
190 .name = "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT",
191 .type = PERF_TYPE_RAW,
192 .config = ATTR_CONFIG(0x0f, 0x08),
193 .exclusive = 0,
194 .pinned = 0,
195 },
196 {
197 .fd = -1,
198 .name = "MEM_UNCORE_RETIRED.REMOTE_DRAM",
199 .type = PERF_TYPE_RAW,
200 .config = ATTR_CONFIG(0x0f, 0x10),
201 .exclusive = 0, /* child events cannot be exclusive */
202 .pinned = 0, /* child events cannot be pinned */
203 },
204 { },
205};
206#endif
207
208static struct perf_fd perf_fds[] = {
209 /* first element is assumed to be group leader */
210#if 0
211 {
212 .fd = -1,
213 .name = "MEM_UNCORE_RETIRED.LOCAL_DRAM",
214 .type = PERF_TYPE_RAW,
215 .config = ATTR_CONFIG(0x0f, 0x20),
216 .exclusive = 1, /* group leader is scheduled exclusively */
217 .pinned = 1, /* group leader is pinnned to CPU (always on) */
218 },
219#endif
220 {
221 .fd = -1,
222 .name = "L2_RQSTS.PREFETCH_HIT",
223 .type = PERF_TYPE_RAW,
224 .config = ATTR_CONFIG(0x24, 0x40),
225#if 0
226 .exclusive = 0,
227 .pinned = 0,
228#endif
229 .exclusive = 1, /* group leader is scheduled exclusively */
230 .pinned = 1, /* group leader is pinnned to CPU (always on) */
231 },
232 {
233 .fd = -1,
234 .name = "L2_RQSTS.PREFETCH_MISS",
235 .type = PERF_TYPE_RAW,
236 .config = ATTR_CONFIG(0x24, 0x80),
237 .exclusive = 0,
238 .pinned = 0,
239 },
240 {
241 .fd = -1,
242 .name = "MEM_LOAD_RETIRED.L3_MISS",
243 .type = PERF_TYPE_RAW,
244 .config = ATTR_CONFIG(0xcb, 0x10),
245 .exclusive = 0,
246 .pinned = 0,
247 },
248 {
249 .fd = -1,
250 .name = "Off Core Response Counter",
251 .type = PERF_TYPE_HW_CACHE,
252 .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS),
253#if 0
254 /* read misses */
255 .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS),
256 /* write misses */
257 .config = ATTR_CONFIG_CACHE(LL, OP_WRITE, RESULT_MISS),
258 /* prefetch misses */
259 .config = ATTR_CONFIG_CACHE(LL, OP_PREFETCH, RESULT_MISS),
260#endif
261 .exclusive = 0,
262 .pinned = 0,
263 },
264 { },
265};
266
267
268static inline void events_ioctl(const int request)
269{
270 ioctl(perf_fds[0].fd, request);
271}
272
273static void do_read(double divide)
274{
275 struct perf_fd *perf_fd;
276 for (perf_fd = perf_fds; PERF_FD_NON_EMPTY(perf_fd); perf_fd++) {
277 __u64 perf_val;
278 ssize_t ret;
279 ret = read(perf_fd->fd, &perf_val, sizeof(perf_val));
280 if (0 >= ret)
281 printf("%50s: ERROR\n", perf_fd->name);
282 else
283 printf("%50s: %10.3f\n",
284 perf_fd->name, (perf_val / divide));
285 ioctl(perf_fd->fd, PERF_EVENT_IOC_RESET);
286 }
287}
288
289static void write_global_perf_attr(struct perf_fd *perf_fd)
290{
291 perf_event_attr.type = perf_fd->type;
292 perf_event_attr.config = perf_fd->config;
293 perf_event_attr.exclusive = perf_fd->exclusive;
294 perf_event_attr.pinned = perf_fd->pinned;
295}
296
297#define CPU 0
298static int setup_perf(void)
299{
300 /* cannot have pid == -1 and cpu == -1 */
301 const int perf_pid = -1; /* -1: all tasks, 0: this task */
302 const int perf_cpu = CPU; /* -1: all CPUs (follow task) */
303 struct perf_fd *perf_fd;
304 int err = 0;
305
306 for (perf_fd = perf_fds; PERF_FD_NON_EMPTY(perf_fd); perf_fd++) {
307 /* make a group whose leader is the zeroth element */
308 const int perf_group = perf_fds[0].fd;
309
310 /* setup the attributes to pass in */
311 write_global_perf_attr(perf_fd);
312
313 perf_fd->fd = sys_perf_event_open(&perf_event_attr, perf_pid,
314 perf_cpu, perf_group, 0);
315
316 if (0 > perf_fd->fd) {
317 fprintf(stderr, "could not setup %s\n", perf_fd->name);
318 err = -1;
319 goto out;
320 }
321 }
322out:
323 return err;
324}
325
326int main(int argc, char **argv)
327{
328
329 const int task_cpu = CPU;
330 int ret = 0, i;
331 int *arena, *page_line_order;
332
333 if (set_affinity(task_cpu)) {
334 fprintf(stderr, "could not set affinity\n");
335 ret = -1;
336 goto out;
337 }
338
339 arena = malloc(ARENA_SIZE);
340 if (!arena) {
341 fprintf(stderr, "could not allocate memory\n");
342 ret = -1;
343 goto out;
344 }
345
346 page_line_order = malloc(PAGE_LINES * sizeof(*page_line_order));
347 if (!page_line_order) {
348 fprintf(stderr, "could not allocate memory\n");
349 ret = -1;
350 goto out;
351 }
352
353 sattolo(page_line_order, PAGE_LINES);
354 //sequential(page_line_order, PAGE_LINES);
355 init_arena_page_line_order(arena, page_line_order);
356
357 if (setup_perf()) {
358 ret = -1;
359 goto out;
360 }
361
362 printf("arena_size: %d\n", ARENA_SIZE);
363 printf("arena_lines: %d\n", ARENA_LINES);
364
365 printf("initially\n");
366 do_read(1.0);
367
368 events_ioctl(PERF_EVENT_IOC_ENABLE);
369 loop_once(perf_fds[0].fd, arena);
370 events_ioctl(PERF_EVENT_IOC_DISABLE);
371 printf("after a loop\n");
372 do_read(1.0);
373
374 events_ioctl(PERF_EVENT_IOC_ENABLE);
375 loop_once(perf_fds[0].fd, arena);
376 events_ioctl(PERF_EVENT_IOC_DISABLE);
377 printf("after another loop\n");
378 do_read(1.0);
379
380 events_ioctl(PERF_EVENT_IOC_ENABLE);
381 loop_once(perf_fds[0].fd, arena);
382 events_ioctl(PERF_EVENT_IOC_DISABLE);
383 printf("after another loop\n");
384 do_read(1.0);
385
386 events_ioctl(PERF_EVENT_IOC_ENABLE);
387 loop_once(perf_fds[0].fd, arena);
388 events_ioctl(PERF_EVENT_IOC_DISABLE);
389 printf("after another loop\n");
390 do_read(1.0);
391
392 events_ioctl(PERF_EVENT_IOC_ENABLE);
393 for (i = 0; i < 100; i++)
394 loop_once(perf_fds[0].fd, arena);
395 events_ioctl(PERF_EVENT_IOC_DISABLE);
396 printf("after 100 loops\n");
397 do_read(100.0);
398
399out:
400 return ret;
401}