diff options
author | Christopher Kenna <cjk@cs.unc.edu> | 2012-04-25 16:13:26 -0400 |
---|---|---|
committer | Christopher Kenna <cjk@cs.unc.edu> | 2012-04-25 16:13:26 -0400 |
commit | 27a8f2048c6c9aebb2230d2903aa71e456c68a84 (patch) | |
tree | e7b6284e2429819c6a51fc44e722e81720a47b29 | |
parent | 339c2d488296e4d0c38bbd781307f95117e7ef66 (diff) |
Add performance counter benchmark.
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | bin/colorbench.c | 323 | ||||
-rw-r--r-- | bin/perfcounters.c | 187 | ||||
-rw-r--r-- | include/perfcounters.h | 36 |
4 files changed, 427 insertions, 121 deletions
@@ -219,7 +219,7 @@ lib-measure_syscall = -lm | |||
219 | obj-colortest = colortest.o color.o | 219 | obj-colortest = colortest.o color.o |
220 | lib-colortest = -static | 220 | lib-colortest = -static |
221 | 221 | ||
222 | obj-colorbench = colorbench.o color.o | 222 | obj-colorbench = colorbench.o color.o perfcounters.o common.o |
223 | lib-colorbench = -lpthread -lrt | 223 | lib-colorbench = -lpthread -lrt |
224 | 224 | ||
225 | obj-testcounters = testcounters.o | 225 | obj-testcounters = testcounters.o |
diff --git a/bin/colorbench.c b/bin/colorbench.c index cf0b927..de7189a 100644 --- a/bin/colorbench.c +++ b/bin/colorbench.c | |||
@@ -1,69 +1,83 @@ | |||
1 | #include <stdint.h> /* for uint16_t */ | 1 | #include <stdint.h> /* rt_param needs uint types */ |
2 | #include <stdlib.h> | 2 | #include <stdlib.h> |
3 | #include <limits.h> | 3 | #include <limits.h> |
4 | #include <pthread.h> | 4 | #include <pthread.h> |
5 | #include <sched.h> | 5 | #include <sched.h> |
6 | #include <sys/mman.h> | ||
7 | #include <errno.h> | 6 | #include <errno.h> |
8 | #include <stdio.h> | 7 | #include <stdio.h> |
9 | #include <time.h> | ||
10 | #include <unistd.h> | 8 | #include <unistd.h> |
9 | #include <sys/mman.h> /* mlockall */ | ||
10 | #include <sys/ioctl.h> | ||
11 | 11 | ||
12 | #include <litmus/rt_param.h> | 12 | #include <litmus/rt_param.h> |
13 | 13 | ||
14 | #include "perfcounters.h" | ||
14 | #include "color.h" | 15 | #include "color.h" |
16 | #include "litmus.h" | ||
15 | 17 | ||
16 | #define DEBUG 1 | 18 | #define DEBUG 1 |
17 | #define NR_LOOPS 10 | 19 | #define NR_LOOPS 100 |
18 | |||
19 | /* pick ONE of these */ | ||
20 | #define TIME_MONO 1 /* monotonic time */ | ||
21 | //#define TIME_THREAD 1 /* thread cpu time */ | ||
22 | |||
23 | /* Ludwig */ | ||
24 | //#define NR_CPUS 6 | ||
25 | //#define CACHE_SIZE_MB 12 | ||
26 | //#define ASSOC 12 | ||
27 | 20 | ||
28 | /* Pound */ | 21 | /* Pound */ |
29 | #define NR_CPUS 4 | 22 | #define NR_CPUS 4 |
30 | #define CACHE_SIZE_MB 8 | 23 | #define CACHE_SIZE_MB 8 |
31 | #define ASSOC 16 | 24 | #define ASSOC 16 |
32 | |||
33 | /* VM */ | ||
34 | //#define NR_CPUS 4 | ||
35 | //#define CACHE_SIZE_MB 4 | ||
36 | //#define ASSOC 16 | ||
37 | |||
38 | #define LINE_SIZE 64 | 25 | #define LINE_SIZE 64 |
26 | |||
39 | #define CACHE_SIZE (CACHE_SIZE_MB * 1024 * 1024) | 27 | #define CACHE_SIZE (CACHE_SIZE_MB * 1024 * 1024) |
40 | #define TOTAL_COLORS (CACHE_SIZE / ASSOC / PAGE_SIZE) | 28 | #define TOTAL_COLORS (CACHE_SIZE / ASSOC / PAGE_SIZE) |
41 | 29 | ||
42 | #define ARENA_PAGES (arena_size / PAGE_SIZE) | 30 | /* number of colors we actually use */ |
43 | #define USE_COLORS (TOTAL_COLORS >> color_shift) | 31 | #define USE_COLORS (TOTAL_COLORS >> color_shift) |
32 | |||
33 | /* how many adjacent pages of the same color we need to allocate */ | ||
44 | #define CONTIG_COLORS (ARENA_PAGES / USE_COLORS) | 34 | #define CONTIG_COLORS (ARENA_PAGES / USE_COLORS) |
45 | 35 | ||
36 | /* number of pages in arena */ | ||
37 | #define ARENA_PAGES (arena_size / PAGE_SIZE) | ||
38 | |||
39 | /* page offset bit mask */ | ||
46 | #define PAGE_LOWER ((PAGE_SIZE - 1)) | 40 | #define PAGE_LOWER ((PAGE_SIZE - 1)) |
41 | |||
42 | /* number of integers in arena */ | ||
47 | #define ARENA_INTS (arena_size / sizeof(int)) | 43 | #define ARENA_INTS (arena_size / sizeof(int)) |
44 | |||
45 | /* number of pages in arena */ | ||
46 | #define ARENA_PAGES (arena_size / PAGE_SIZE) | ||
47 | |||
48 | /* number of cache lines in arena */ | ||
48 | #define ARENA_LINES (arena_size / LINE_SIZE) | 49 | #define ARENA_LINES (arena_size / LINE_SIZE) |
49 | 50 | ||
51 | /* number of cache lines per page */ | ||
52 | #define PAGE_LINES (PAGE_SIZE / LINE_SIZE) | ||
53 | |||
54 | /* number of integers in a page */ | ||
55 | #define PAGE_INTS (PAGE_SIZE / sizeof(int)) | ||
56 | |||
57 | /* number of integers in a cache line */ | ||
58 | #define LINE_INTS (LINE_SIZE / sizeof(int)) | ||
59 | |||
60 | /* convert page number and cache line number to an integer index */ | ||
61 | #define PAGE_AND_LINE_TO_IDX(page, line) \ | ||
62 | (((page) * PAGE_INTS) + ((line) * LINE_INTS)) | ||
63 | |||
64 | /* what CPU a thread should run on */ | ||
50 | #define THREAD_CPU(t) (t * (NR_CPUS / nr_threads)) | 65 | #define THREAD_CPU(t) (t * (NR_CPUS / nr_threads)) |
51 | 66 | ||
52 | #define FNAME_LEN 512 | ||
53 | struct pthread_state { | 67 | struct pthread_state { |
54 | pthread_t thread; | 68 | pthread_t thread; |
55 | int tid; | 69 | int tid; |
56 | int retval; | 70 | int retval; |
57 | struct color_ctrl_page *color_ctrl; | 71 | struct color_ctrl_page *color_ctrl; |
58 | int *arena; | 72 | int *arena; |
59 | char fname[FNAME_LEN]; | ||
60 | FILE *file; | ||
61 | }; | 73 | }; |
62 | 74 | ||
63 | static pthread_barrier_t start_barrier, end_barrier; | 75 | static pthread_barrier_t barrier; |
64 | static int nr_threads; | 76 | static int nr_threads; |
65 | static int arena_size; | 77 | static int arena_size; |
66 | static int color_shift; | 78 | static int color_shift; |
79 | static int *page_line_order; | ||
80 | static struct perf_counter perf_counters[NR_CPUS * NR_PERF_COUNTERS]; | ||
67 | 81 | ||
68 | #ifdef DEBUG | 82 | #ifdef DEBUG |
69 | #define debug_print(fmt, args...) do { \ | 83 | #define debug_print(fmt, args...) do { \ |
@@ -77,53 +91,115 @@ static int color_shift; | |||
77 | #define debug_print_thread(ts, fmt, args...) do {} while (0) | 91 | #define debug_print_thread(ts, fmt, args...) do {} while (0) |
78 | #endif | 92 | #endif |
79 | 93 | ||
80 | static void mk_fname(struct pthread_state *state) | 94 | /* |
95 | * Get a random number in [0, max). Not really a good way to do this. | ||
96 | */ | ||
97 | inline int randrange(const int max) | ||
98 | { | ||
99 | return (rand() / (RAND_MAX / max + 1)); | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Write 1, 2, ..., n - 1, 0 into items. | ||
104 | */ | ||
105 | void sequential(int *items, const int len) | ||
106 | { | ||
107 | int i; | ||
108 | for (i = 0; i < len; i++) | ||
109 | items[i] = (i + 1) % len; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Sattolo's algorithm makes a random cycle that includes all the elements | ||
114 | * in the items array. | ||
115 | */ | ||
116 | void sattolo(int *items, const int len) | ||
81 | { | 117 | { |
82 | snprintf(state->fname, FNAME_LEN, "cache_size=%d_line=%d_assoc=%d_" | 118 | int i; |
83 | "colors=%d_color-shift=%d_use-colors=%d_arena-size=%d_" | 119 | /* first set up 0, 1, ..., n - 1 */ |
84 | "arena-pages=%d_contig-colors=%d_thread=%d_" | 120 | for (i = 0; i < len; i++) |
85 | "cpu=%d.dat", | 121 | items[i] = i; |
86 | CACHE_SIZE, LINE_SIZE, ASSOC, TOTAL_COLORS, | 122 | /* note: i is now n */ |
87 | color_shift, USE_COLORS, arena_size, | 123 | while (1 < i--) { |
88 | ARENA_PAGES, CONTIG_COLORS, state->tid, | 124 | /* 0 <= j < i */ |
89 | THREAD_CPU(state->tid)); | 125 | int t, j = randrange(i); |
126 | t = items[i]; | ||
127 | items[i] = items[j]; | ||
128 | items[j] = t; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Write the order to read the arena into the arena. Each page in the arena is | ||
134 | * read back, but the page is read in a random order to prevent the prefetcher | ||
135 | * from working. | ||
136 | * | ||
137 | * Starting at position 0 in the page_line_order means the cycle ends with 0. | ||
138 | * We use 0 in the arena to signify that we are done reading. | ||
139 | */ | ||
140 | static void init_arena_page_line_order(int *arena) | ||
141 | { | ||
142 | int cur_page; | ||
143 | for (cur_page = 0; cur_page < ARENA_PAGES; cur_page++) { | ||
144 | /* for each page in the arena */ | ||
145 | int cur_line; | ||
146 | for (cur_line = 0; cur_line < PAGE_LINES; cur_line++) { | ||
147 | /* for each line in the page */ | ||
148 | const int idx = PAGE_AND_LINE_TO_IDX(cur_page, | ||
149 | cur_line); | ||
150 | const int next_line = page_line_order[cur_line]; | ||
151 | int next_idx = PAGE_AND_LINE_TO_IDX(cur_page, | ||
152 | next_line); | ||
153 | |||
154 | if (0 == next_line) { | ||
155 | /* special case: cycle end */ | ||
156 | if (cur_page < ARENA_PAGES - 1) { | ||
157 | /* arena has more pages: go to next */ | ||
158 | next_idx = PAGE_AND_LINE_TO_IDX( | ||
159 | (cur_page + 1), 0); | ||
160 | } else { | ||
161 | /* the very last element */ | ||
162 | next_idx = 0; | ||
163 | } | ||
164 | } | ||
165 | arena[idx] = next_idx; | ||
166 | } | ||
167 | } | ||
90 | } | 168 | } |
91 | 169 | ||
92 | static void setup_colors(struct pthread_state *state) | 170 | static void setup_colors(struct pthread_state *state) |
93 | { | 171 | { |
94 | int i, j; | 172 | int color, i; |
95 | for (i = 0; i < USE_COLORS; i++) { | 173 | for (color = 0; color < USE_COLORS; color++) { |
96 | for (j = 0; j < CONTIG_COLORS; j++) | 174 | /* what color do we use */ |
97 | state->color_ctrl->colors[CONTIG_COLORS * i + j] = i; | 175 | for (i = 0; i < CONTIG_COLORS; i++) { |
176 | /* how many times do we use it */ | ||
177 | const int idx = CONTIG_COLORS * color + i; | ||
178 | state->color_ctrl->colors[idx] = color; | ||
179 | } | ||
98 | } | 180 | } |
181 | #if 0 | ||
182 | for (i = 0; i < ARENA_PAGES; i++) | ||
183 | printf("%d: %2d\n", i, state->color_ctrl->colors[i]); | ||
184 | #endif | ||
99 | } | 185 | } |
100 | 186 | ||
101 | static int loop_once(struct pthread_state *state) | 187 | static int loop_once(struct pthread_state *state) |
102 | { | 188 | { |
103 | int i, j = 0; | 189 | int i = 0, j; |
104 | for (i = 0; i < ARENA_INTS; i += 1) | 190 | do { |
105 | j = state->arena[i]; | 191 | i = state->arena[i]; |
192 | j = i; | ||
193 | } while (i); | ||
106 | return j; | 194 | return j; |
107 | } | 195 | } |
108 | 196 | ||
109 | int thread_init(struct pthread_state *state) | 197 | int thread_init(struct pthread_state *state) |
110 | { | 198 | { |
111 | const int cpu = THREAD_CPU(state->tid); | 199 | const int cpu = THREAD_CPU(state->tid); |
112 | cpu_set_t cpu_set; | ||
113 | int err = 0; | 200 | int err = 0; |
114 | 201 | ||
115 | mk_fname(state); | 202 | err = be_migrate_to(cpu); |
116 | |||
117 | state->file = fopen(state->fname, "w"); | ||
118 | if (!state->file) { | ||
119 | debug_print_thread(state, "open file failed\n"); | ||
120 | err = 1; | ||
121 | goto out; | ||
122 | } | ||
123 | |||
124 | CPU_ZERO(&cpu_set); | ||
125 | CPU_SET(cpu, &cpu_set); | ||
126 | err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set); | ||
127 | if (err) { | 203 | if (err) { |
128 | debug_print_thread(state, "set affinity failed\n"); | 204 | debug_print_thread(state, "set affinity failed\n"); |
129 | goto out; | 205 | goto out; |
@@ -144,6 +220,8 @@ int thread_init(struct pthread_state *state) | |||
144 | goto out; | 220 | goto out; |
145 | } | 221 | } |
146 | 222 | ||
223 | init_arena_page_line_order(state->arena); | ||
224 | |||
147 | err = mlockall(MCL_CURRENT|MCL_FUTURE); | 225 | err = mlockall(MCL_CURRENT|MCL_FUTURE); |
148 | if (err) | 226 | if (err) |
149 | { | 227 | { |
@@ -154,76 +232,39 @@ out: | |||
154 | return err; | 232 | return err; |
155 | } | 233 | } |
156 | 234 | ||
157 | #ifdef DEBUG | 235 | static void change_counters(const int request) |
158 | #define CHECKPRINT(func, ...) do { \ | ||
159 | int err = func(__VA_ARGS__); \ | ||
160 | if (err) \ | ||
161 | fprintf(stderr, "T%d: func failed\n", state->tid); \ | ||
162 | } while (0) | ||
163 | #else | ||
164 | #define CHECKPRINT(func, ...) func(__VA_ARGS__) | ||
165 | #endif | ||
166 | |||
167 | static void get_time(struct timespec *ts) | ||
168 | { | ||
169 | int err; | ||
170 | #if defined(TIME_MONO) | ||
171 | err = clock_gettime(CLOCK_MONOTONIC, ts); | ||
172 | #elif defined(TIME_THREAD) | ||
173 | err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts); | ||
174 | #endif | ||
175 | if (err) | ||
176 | fprintf(stderr, "clock_gettime failed!\n"); | ||
177 | } | ||
178 | |||
179 | static unsigned long ts_to_long(const struct timespec *ts) | ||
180 | { | 236 | { |
181 | unsigned long x = (((unsigned long)ts->tv_sec) * 1e9) + | 237 | int cpu; |
182 | ((unsigned long)ts->tv_nsec); | 238 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
183 | return x; | 239 | const int ctr_idx = cpu * NR_PERF_COUNTERS; |
184 | } | 240 | ioctl(perf_counters[ctr_idx].fd, request); |
185 | 241 | } | |
186 | static unsigned long ts_difference(const struct timespec *start, | ||
187 | const struct timespec *end) | ||
188 | { | ||
189 | unsigned long t1 = ts_to_long(start); | ||
190 | unsigned long t2 = ts_to_long(end); | ||
191 | return (t2 - t1); | ||
192 | } | 242 | } |
193 | 243 | ||
194 | void * thread_start(void *data) | 244 | void * thread_start(void *data) |
195 | { | 245 | { |
196 | struct pthread_state *state = (struct pthread_state*) data; | 246 | struct pthread_state *state = (struct pthread_state*) data; |
197 | struct timespec start, end; | ||
198 | unsigned long time; | ||
199 | int i; | 247 | int i; |
200 | 248 | ||
201 | state->retval = thread_init(state); | 249 | state->retval = thread_init(state); |
202 | if (state->retval) | 250 | if (state->retval) |
203 | goto out; | 251 | goto out; |
204 | 252 | ||
205 | loop_once(state); | 253 | pthread_barrier_wait(&barrier); |
206 | 254 | ||
207 | for (i = 0; i < NR_LOOPS; ++i) { | 255 | if (0 == state->tid) |
208 | pthread_barrier_wait(&start_barrier); | 256 | change_counters(PERF_EVENT_IOC_ENABLE); |
209 | 257 | ||
210 | get_time(&start); | 258 | pthread_barrier_wait(&barrier); |
211 | loop_once(state); | ||
212 | get_time(&end); | ||
213 | |||
214 | pthread_barrier_wait(&end_barrier); | ||
215 | 259 | ||
216 | time = ts_difference(&start, &end); | 260 | for (i = 0; i < NR_LOOPS; ++i) { |
217 | fprintf(state->file, "%lu\n", time); | 261 | loop_once(state); |
218 | state->retval = fflush(state->file); | 262 | pthread_barrier_wait(&barrier); |
219 | if (state->retval) | ||
220 | goto out; | ||
221 | state->retval = fsync(fileno(state->file)); | ||
222 | if (state->retval) | ||
223 | goto out; | ||
224 | } | 263 | } |
225 | 264 | ||
226 | fclose(state->file); | 265 | if (0 == state->tid) |
266 | change_counters(PERF_EVENT_IOC_DISABLE); | ||
267 | |||
227 | out: | 268 | out: |
228 | pthread_exit(&state->retval); | 269 | pthread_exit(&state->retval); |
229 | } | 270 | } |
@@ -249,7 +290,40 @@ out: | |||
249 | return err; | 290 | return err; |
250 | } | 291 | } |
251 | 292 | ||
252 | static struct pthread_state *pthread_state; | 293 | static int setup_perf_counters(void) |
294 | { | ||
295 | int cpu, ret = 0; | ||
296 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
297 | const int idx = cpu * NR_PERF_COUNTERS; | ||
298 | const int group_leader = -1; | ||
299 | ret = setup_cpu_perf(cpu, group_leader, &perf_counters[idx]); | ||
300 | if (ret) | ||
301 | goto out; | ||
302 | } | ||
303 | out: | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | static void print_perf_counters(void) | ||
308 | { | ||
309 | uint64_t val; | ||
310 | int err, cpu, i; | ||
311 | const char *name; | ||
312 | |||
313 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
314 | printf("CPU %d\n", cpu); | ||
315 | for (i = 0; i < NR_PERF_COUNTERS; i++) { | ||
316 | const int idx = cpu * NR_PERF_COUNTERS + i; | ||
317 | name = get_perf_name(&perf_counters[idx]); | ||
318 | err = read_perf_counter(&perf_counters[idx], &val); | ||
319 | if (err) | ||
320 | printf("%50s: ERROR\n", name); | ||
321 | else | ||
322 | printf("%50s: %10.3f\n", name, | ||
323 | ((double)val) / NR_LOOPS); | ||
324 | } | ||
325 | } | ||
326 | } | ||
253 | 327 | ||
254 | #define CHECK(fun, ...) { \ | 328 | #define CHECK(fun, ...) { \ |
255 | int err = fun(__VA_ARGS__); \ | 329 | int err = fun(__VA_ARGS__); \ |
@@ -263,6 +337,8 @@ int main(int argc, char **argv) | |||
263 | { | 337 | { |
264 | int ret = 0, err, i; | 338 | int ret = 0, err, i; |
265 | long int strtol_val; | 339 | long int strtol_val; |
340 | |||
341 | struct pthread_state *pthread_state; | ||
266 | pthread_attr_t attr; | 342 | pthread_attr_t attr; |
267 | 343 | ||
268 | if (argc < 3) { | 344 | if (argc < 3) { |
@@ -272,11 +348,11 @@ int main(int argc, char **argv) | |||
272 | goto out; | 348 | goto out; |
273 | } | 349 | } |
274 | 350 | ||
275 | err = xstrtol(argv[1], &strtol_val); | 351 | err = xstrtol(argv[1], &strtol_val); |
276 | nr_threads = strtol_val; | 352 | nr_threads = strtol_val; |
277 | err |= xstrtol(argv[2], &strtol_val); | 353 | err |= xstrtol(argv[2], &strtol_val); |
278 | arena_size = strtol_val; | 354 | arena_size = strtol_val; |
279 | err |= xstrtol(argv[3], &strtol_val); | 355 | err |= xstrtol(argv[3], &strtol_val); |
280 | color_shift = strtol_val; | 356 | color_shift = strtol_val; |
281 | if (err) { | 357 | if (err) { |
282 | debug_print("non-integer argument?\n"); | 358 | debug_print("non-integer argument?\n"); |
@@ -289,20 +365,25 @@ int main(int argc, char **argv) | |||
289 | goto out; | 365 | goto out; |
290 | } | 366 | } |
291 | 367 | ||
292 | pthread_state = malloc(nr_threads * sizeof(*pthread_state)); | 368 | pthread_state = malloc(nr_threads * sizeof(*pthread_state)); |
293 | if (!pthread_state) { | 369 | page_line_order = malloc(PAGE_LINES * sizeof(*page_line_order)); |
294 | debug_print("could not malloc for state\n"); | 370 | if (!pthread_state || !page_line_order) { |
371 | debug_print("could not malloc\n"); | ||
295 | ret = 1; | 372 | ret = 1; |
296 | goto out; | 373 | goto out; |
297 | } | 374 | } |
298 | 375 | ||
376 | CHECK(setup_perf_counters); | ||
377 | |||
299 | CHECK(pthread_attr_init, &attr); | 378 | CHECK(pthread_attr_init, &attr); |
300 | CHECK(pthread_attr_setdetachstate, &attr, PTHREAD_CREATE_JOINABLE); | 379 | CHECK(pthread_attr_setdetachstate, &attr, PTHREAD_CREATE_JOINABLE); |
301 | CHECK(pthread_barrier_init, &start_barrier, NULL, nr_threads); | 380 | CHECK(pthread_barrier_init, &barrier, NULL, nr_threads); |
302 | CHECK(pthread_barrier_init, &end_barrier, NULL, nr_threads); | 381 | |
382 | sattolo(page_line_order, PAGE_LINES); | ||
383 | //sequential(page_line_order, PAGE_LINES); | ||
303 | 384 | ||
304 | for (i = 0; i < nr_threads; i++) { | 385 | for (i = 0; i < nr_threads; i++) { |
305 | pthread_state[i].tid= i; | 386 | pthread_state[i].tid = i; |
306 | CHECK(pthread_create, &pthread_state[i].thread, &attr, thread_start, | 387 | CHECK(pthread_create, &pthread_state[i].thread, &attr, thread_start, |
307 | (void*)&pthread_state[i]); | 388 | (void*)&pthread_state[i]); |
308 | } | 389 | } |
@@ -321,10 +402,12 @@ int main(int argc, char **argv) | |||
321 | } | 402 | } |
322 | } | 403 | } |
323 | 404 | ||
324 | CHECK(pthread_barrier_destroy, &start_barrier); | 405 | CHECK(pthread_barrier_destroy, &barrier); |
325 | CHECK(pthread_barrier_destroy, &end_barrier); | ||
326 | CHECK(pthread_attr_destroy, &attr); | 406 | CHECK(pthread_attr_destroy, &attr); |
327 | 407 | ||
408 | printf("arena pages: %d\n", ARENA_PAGES); | ||
409 | print_perf_counters(); | ||
410 | |||
328 | out: | 411 | out: |
329 | return ret; | 412 | return ret; |
330 | } | 413 | } |
diff --git a/bin/perfcounters.c b/bin/perfcounters.c new file mode 100644 index 0000000..402e047 --- /dev/null +++ b/bin/perfcounters.c | |||
@@ -0,0 +1,187 @@ | |||
1 | #include "asm/unistd.h" /* from kernel source tree */ | ||
2 | #include <unistd.h> /* for syscall */ | ||
3 | |||
4 | #include <sys/ioctl.h> | ||
5 | |||
6 | #include "perfcounters.h" | ||
7 | |||
8 | #define C(x) (PERF_COUNT_HW_CACHE_##x) | ||
9 | #define ATTR_CONFIG_CACHE(cache, op, result) \ | ||
10 | (((C(cache) & 0xffULL) << 0) | \ | ||
11 | ((C(op) & 0xffULL) << 8) | \ | ||
12 | ((C(result) & 0xffULL) << 16)) | ||
13 | |||
14 | #define ATTR_CONFIG(event, umask) \ | ||
15 | ((((event) & 0xffULL) << 0) | \ | ||
16 | (((umask) & 0xffULL) << 8)) | ||
17 | |||
18 | static struct perf_event_attr perf_event_attr = { | ||
19 | .type = 0, /* set per initilized event */ | ||
20 | .size = 0, /* set later */ | ||
21 | .config = 0, /* set per initilized event */ | ||
22 | { .sample_period = 0, }, /* is a counter, so no period */ | ||
23 | .disabled = 0, /* event is enabled */ | ||
24 | .inherit = 0, /* children don't inherit */ | ||
25 | .pinned = 0, /* set per initilized event */ | ||
26 | .exclusive = 0, /* set per initilized event */ | ||
27 | .exclude_user = 0, /* don't count user (when set) */ | ||
28 | .exclude_kernel = 0, /* ditto kernel */ | ||
29 | .exclude_hv = 0, /* ditto hypervisor */ | ||
30 | .exclude_idle = 0, /* don't count when idle */ | ||
31 | .mmap = 0, /* include mmap data */ | ||
32 | .comm = 0, /* include comm data */ | ||
33 | }; | ||
34 | |||
35 | struct perf_counter_setup { | ||
36 | char *name; | ||
37 | enum perf_type_id type; | ||
38 | uint64_t config; | ||
39 | }; | ||
40 | |||
41 | #if 0 | ||
42 | /* these events are always zero */ | ||
43 | static struct perf_fd perf_fds[] = { | ||
44 | { | ||
45 | .fd = -1, | ||
46 | .name = "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT", | ||
47 | .type = PERF_TYPE_RAW, | ||
48 | .config = ATTR_CONFIG(0x0f, 0x08), | ||
49 | .exclusive = 0, | ||
50 | .pinned = 0, | ||
51 | }, | ||
52 | { | ||
53 | .fd = -1, | ||
54 | .name = "MEM_UNCORE_RETIRED.REMOTE_DRAM", | ||
55 | .type = PERF_TYPE_RAW, | ||
56 | .config = ATTR_CONFIG(0x0f, 0x10), | ||
57 | .exclusive = 0, /* child events cannot be exclusive */ | ||
58 | .pinned = 0, /* child events cannot be pinned */ | ||
59 | }, | ||
60 | { }, | ||
61 | }; | ||
62 | #endif | ||
63 | |||
64 | static struct perf_counter_setup perf_setup[NR_PERF_COUNTERS] = { | ||
65 | #if 0 | ||
66 | { | ||
67 | .name = "MEM_UNCORE_RETIRED.LOCAL_DRAM", | ||
68 | .type = PERF_TYPE_RAW, | ||
69 | .config = ATTR_CONFIG(0x0f, 0x20), | ||
70 | }, | ||
71 | #endif | ||
72 | { | ||
73 | .name = "L2_RQSTS.PREFETCH_HIT", | ||
74 | .type = PERF_TYPE_RAW, | ||
75 | .config = ATTR_CONFIG(0x24, 0x40), | ||
76 | }, | ||
77 | { | ||
78 | .name = "L2_RQSTS.PREFETCH_MISS", | ||
79 | .type = PERF_TYPE_RAW, | ||
80 | .config = ATTR_CONFIG(0x24, 0x80), | ||
81 | }, | ||
82 | { | ||
83 | .name = "MEM_LOAD_RETIRED.L3_MISS", | ||
84 | .type = PERF_TYPE_RAW, | ||
85 | .config = ATTR_CONFIG(0xcb, 0x10), | ||
86 | }, | ||
87 | { | ||
88 | .name = "Off Core Response Counter", | ||
89 | .type = PERF_TYPE_HW_CACHE, | ||
90 | .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS), | ||
91 | #if 0 | ||
92 | /* read misses */ | ||
93 | .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS), | ||
94 | /* write misses */ | ||
95 | .config = ATTR_CONFIG_CACHE(LL, OP_WRITE, RESULT_MISS), | ||
96 | /* prefetch misses */ | ||
97 | .config = ATTR_CONFIG_CACHE(LL, OP_PREFETCH, RESULT_MISS), | ||
98 | #endif | ||
99 | }, | ||
100 | }; | ||
101 | |||
102 | |||
103 | /* from kernel tools/perf/perf.h */ | ||
104 | int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, | ||
105 | int cpu, int group_fd, unsigned long flags) | ||
106 | { | ||
107 | attr->size = sizeof(*attr); | ||
108 | return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); | ||
109 | } | ||
110 | |||
111 | /* make the temporary attributes shadow those in the perf_fd temporarially */ | ||
112 | static void write_global_perf_attr(const struct perf_counter_setup *p) | ||
113 | { | ||
114 | perf_event_attr.type = p->type; | ||
115 | perf_event_attr.config = p->config; | ||
116 | perf_event_attr.pinned = 0; | ||
117 | perf_event_attr.exclusive = 0; | ||
118 | } | ||
119 | |||
120 | int setup_cpu_perf(const int cpu, const int group_leader, | ||
121 | struct perf_counter *perf_counters) | ||
122 | { | ||
123 | const int perf_pid = -1; /* -1: all tasks */ | ||
124 | int err = 0, i; | ||
125 | |||
126 | if (-1 == group_leader) { | ||
127 | /* first element determines the group for all others */ | ||
128 | perf_counters->fd = -1; | ||
129 | } | ||
130 | |||
131 | for (i = 0; i < NR_PERF_COUNTERS; i++) { | ||
132 | int perf_group; | ||
133 | |||
134 | /* setup the attributes to pass in */ | ||
135 | write_global_perf_attr(&perf_setup[i]); | ||
136 | |||
137 | if (0 == i && -1 == group_leader) { | ||
138 | /* but group leader is pinned and exclusive */ | ||
139 | perf_event_attr.exclusive = 1; | ||
140 | perf_event_attr.pinned = 1; | ||
141 | perf_group = -1; | ||
142 | } else if (-1 == group_leader) { | ||
143 | /* not first counter, but no group passed in */ | ||
144 | perf_group = perf_counters[0].fd; | ||
145 | } | ||
146 | |||
147 | perf_counters[i].fd = sys_perf_event_open(&perf_event_attr, | ||
148 | perf_pid, cpu, perf_group, 0); | ||
149 | |||
150 | if (0 > perf_counters[i].fd) { | ||
151 | err = -1; | ||
152 | goto out; | ||
153 | } | ||
154 | |||
155 | /* save the attributes in the user-visible configuration */ | ||
156 | perf_counters[i].type = perf_setup[i].type; | ||
157 | perf_counters[i].config = perf_setup[i].config; | ||
158 | } | ||
159 | out: | ||
160 | return err; | ||
161 | } | ||
162 | |||
163 | static inline int perf_setup_match(const struct perf_counter_setup* ps, | ||
164 | const struct perf_counter *pc) | ||
165 | { | ||
166 | return (ps->type == pc->type && ps->config == pc->config); | ||
167 | } | ||
168 | |||
169 | const char* get_perf_name(const struct perf_counter* perf_counter) | ||
170 | { | ||
171 | char *ret = NULL; | ||
172 | int i; | ||
173 | |||
174 | for (i = 0; i < NR_PERF_COUNTERS; i++) { | ||
175 | if (perf_setup_match(&perf_setup[i], perf_counter)) { | ||
176 | ret = perf_setup[i].name; | ||
177 | break; | ||
178 | } | ||
179 | } | ||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | int read_perf_counter(const struct perf_counter* perf_counter, uint64_t *val) | ||
184 | { | ||
185 | ssize_t ret = read(perf_counter->fd, val, sizeof(*val)); | ||
186 | return (ret <= 0); | ||
187 | } | ||
diff --git a/include/perfcounters.h b/include/perfcounters.h new file mode 100644 index 0000000..03f94fb --- /dev/null +++ b/include/perfcounters.h | |||
@@ -0,0 +1,36 @@ | |||
1 | #ifndef PERFCOUNTERS_H | ||
2 | #define PERFCOUNTERS_H | ||
3 | |||
4 | #include <stdint.h> | ||
5 | |||
6 | #include "../../litmus-rt/include/linux/perf_event.h" | ||
7 | |||
8 | #define NR_PERF_COUNTERS 4 | ||
9 | |||
10 | /* | ||
11 | * Retain this information with a performance counter file descriptor. | ||
12 | */ | ||
13 | struct perf_counter { | ||
14 | int fd; | ||
15 | enum perf_type_id type; | ||
16 | uint64_t config; | ||
17 | }; | ||
18 | |||
19 | |||
20 | /* | ||
21 | * Initialize a set of counters for a CPU. | ||
22 | * | ||
23 | * This is NOT thread safe! | ||
24 | * | ||
25 | * @cpu CPU | ||
26 | * @group_leader group leader PID or -1 make a new group | ||
27 | * @perf_counter array | ||
28 | * @return 0 or error | ||
29 | */ | ||
30 | int setup_cpu_perf(const int, const int, struct perf_counter*); | ||
31 | |||
32 | const char* get_perf_name(const struct perf_counter*); | ||
33 | |||
34 | int read_perf_counter(const struct perf_counter*, uint64_t*); | ||
35 | |||
36 | #endif | ||