aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristopher Kenna <cjk@cs.unc.edu>2012-04-25 16:13:26 -0400
committerChristopher Kenna <cjk@cs.unc.edu>2012-04-25 16:13:26 -0400
commit27a8f2048c6c9aebb2230d2903aa71e456c68a84 (patch)
treee7b6284e2429819c6a51fc44e722e81720a47b29
parent339c2d488296e4d0c38bbd781307f95117e7ef66 (diff)
Add performance counter benchmark.
-rw-r--r--Makefile2
-rw-r--r--bin/colorbench.c323
-rw-r--r--bin/perfcounters.c187
-rw-r--r--include/perfcounters.h36
4 files changed, 427 insertions, 121 deletions
diff --git a/Makefile b/Makefile
index 1a48d8f..f6b865c 100644
--- a/Makefile
+++ b/Makefile
@@ -219,7 +219,7 @@ lib-measure_syscall = -lm
219obj-colortest = colortest.o color.o 219obj-colortest = colortest.o color.o
220lib-colortest = -static 220lib-colortest = -static
221 221
222obj-colorbench = colorbench.o color.o 222obj-colorbench = colorbench.o color.o perfcounters.o common.o
223lib-colorbench = -lpthread -lrt 223lib-colorbench = -lpthread -lrt
224 224
225obj-testcounters = testcounters.o 225obj-testcounters = testcounters.o
diff --git a/bin/colorbench.c b/bin/colorbench.c
index cf0b927..de7189a 100644
--- a/bin/colorbench.c
+++ b/bin/colorbench.c
@@ -1,69 +1,83 @@
1#include <stdint.h> /* for uint16_t */ 1#include <stdint.h> /* rt_param needs uint types */
2#include <stdlib.h> 2#include <stdlib.h>
3#include <limits.h> 3#include <limits.h>
4#include <pthread.h> 4#include <pthread.h>
5#include <sched.h> 5#include <sched.h>
6#include <sys/mman.h>
7#include <errno.h> 6#include <errno.h>
8#include <stdio.h> 7#include <stdio.h>
9#include <time.h>
10#include <unistd.h> 8#include <unistd.h>
9#include <sys/mman.h> /* mlockall */
10#include <sys/ioctl.h>
11 11
12#include <litmus/rt_param.h> 12#include <litmus/rt_param.h>
13 13
14#include "perfcounters.h"
14#include "color.h" 15#include "color.h"
16#include "litmus.h"
15 17
16#define DEBUG 1 18#define DEBUG 1
17#define NR_LOOPS 10 19#define NR_LOOPS 100
18
19/* pick ONE of these */
20#define TIME_MONO 1 /* monotonic time */
21//#define TIME_THREAD 1 /* thread cpu time */
22
23/* Ludwig */
24//#define NR_CPUS 6
25//#define CACHE_SIZE_MB 12
26//#define ASSOC 12
27 20
28/* Pound */ 21/* Pound */
29#define NR_CPUS 4 22#define NR_CPUS 4
30#define CACHE_SIZE_MB 8 23#define CACHE_SIZE_MB 8
31#define ASSOC 16 24#define ASSOC 16
32
33/* VM */
34//#define NR_CPUS 4
35//#define CACHE_SIZE_MB 4
36//#define ASSOC 16
37
38#define LINE_SIZE 64 25#define LINE_SIZE 64
26
39#define CACHE_SIZE (CACHE_SIZE_MB * 1024 * 1024) 27#define CACHE_SIZE (CACHE_SIZE_MB * 1024 * 1024)
40#define TOTAL_COLORS (CACHE_SIZE / ASSOC / PAGE_SIZE) 28#define TOTAL_COLORS (CACHE_SIZE / ASSOC / PAGE_SIZE)
41 29
42#define ARENA_PAGES (arena_size / PAGE_SIZE) 30/* number of colors we actually use */
43#define USE_COLORS (TOTAL_COLORS >> color_shift) 31#define USE_COLORS (TOTAL_COLORS >> color_shift)
32
33/* how many adjacent pages of the same color we need to allocate */
44#define CONTIG_COLORS (ARENA_PAGES / USE_COLORS) 34#define CONTIG_COLORS (ARENA_PAGES / USE_COLORS)
45 35
36/* number of pages in arena */
37#define ARENA_PAGES (arena_size / PAGE_SIZE)
38
39/* page offset bit mask */
46#define PAGE_LOWER ((PAGE_SIZE - 1)) 40#define PAGE_LOWER ((PAGE_SIZE - 1))
41
42/* number of integers in arena */
47#define ARENA_INTS (arena_size / sizeof(int)) 43#define ARENA_INTS (arena_size / sizeof(int))
44
45/* number of pages in arena */
46#define ARENA_PAGES (arena_size / PAGE_SIZE)
47
48/* number of cache lines in arena */
48#define ARENA_LINES (arena_size / LINE_SIZE) 49#define ARENA_LINES (arena_size / LINE_SIZE)
49 50
51/* number of cache lines per page */
52#define PAGE_LINES (PAGE_SIZE / LINE_SIZE)
53
54/* number of integers in a page */
55#define PAGE_INTS (PAGE_SIZE / sizeof(int))
56
57/* number of integers in a cache line */
58#define LINE_INTS (LINE_SIZE / sizeof(int))
59
60/* convert page number and cache line number to an integer index */
61#define PAGE_AND_LINE_TO_IDX(page, line) \
62 (((page) * PAGE_INTS) + ((line) * LINE_INTS))
63
64/* what CPU a thread should run on */
50#define THREAD_CPU(t) (t * (NR_CPUS / nr_threads)) 65#define THREAD_CPU(t) (t * (NR_CPUS / nr_threads))
51 66
52#define FNAME_LEN 512
53struct pthread_state { 67struct pthread_state {
54 pthread_t thread; 68 pthread_t thread;
55 int tid; 69 int tid;
56 int retval; 70 int retval;
57 struct color_ctrl_page *color_ctrl; 71 struct color_ctrl_page *color_ctrl;
58 int *arena; 72 int *arena;
59 char fname[FNAME_LEN];
60 FILE *file;
61}; 73};
62 74
63static pthread_barrier_t start_barrier, end_barrier; 75static pthread_barrier_t barrier;
64static int nr_threads; 76static int nr_threads;
65static int arena_size; 77static int arena_size;
66static int color_shift; 78static int color_shift;
79static int *page_line_order;
80static struct perf_counter perf_counters[NR_CPUS * NR_PERF_COUNTERS];
67 81
68#ifdef DEBUG 82#ifdef DEBUG
69#define debug_print(fmt, args...) do { \ 83#define debug_print(fmt, args...) do { \
@@ -77,53 +91,115 @@ static int color_shift;
77#define debug_print_thread(ts, fmt, args...) do {} while (0) 91#define debug_print_thread(ts, fmt, args...) do {} while (0)
78#endif 92#endif
79 93
80static void mk_fname(struct pthread_state *state) 94/*
95 * Get a random number in [0, max). Not really a good way to do this.
96 */
97inline int randrange(const int max)
98{
99 return (rand() / (RAND_MAX / max + 1));
100}
101
102/*
103 * Write 1, 2, ..., n - 1, 0 into items.
104 */
105void sequential(int *items, const int len)
106{
107 int i;
108 for (i = 0; i < len; i++)
109 items[i] = (i + 1) % len;
110}
111
112/*
113 * Sattolo's algorithm makes a random cycle that includes all the elements
114 * in the items array.
115 */
116void sattolo(int *items, const int len)
81{ 117{
82 snprintf(state->fname, FNAME_LEN, "cache_size=%d_line=%d_assoc=%d_" 118 int i;
83 "colors=%d_color-shift=%d_use-colors=%d_arena-size=%d_" 119 /* first set up 0, 1, ..., n - 1 */
84 "arena-pages=%d_contig-colors=%d_thread=%d_" 120 for (i = 0; i < len; i++)
85 "cpu=%d.dat", 121 items[i] = i;
86 CACHE_SIZE, LINE_SIZE, ASSOC, TOTAL_COLORS, 122 /* note: i is now n */
87 color_shift, USE_COLORS, arena_size, 123 while (1 < i--) {
88 ARENA_PAGES, CONTIG_COLORS, state->tid, 124 /* 0 <= j < i */
89 THREAD_CPU(state->tid)); 125 int t, j = randrange(i);
126 t = items[i];
127 items[i] = items[j];
128 items[j] = t;
129 }
130}
131
132/*
133 * Write the order to read the arena into the arena. Each page in the arena is
134 * read back, but the page is read in a random order to prevent the prefetcher
135 * from working.
136 *
137 * Starting at position 0 in the page_line_order means the cycle ends with 0.
138 * We use 0 in the arena to signify that we are done reading.
139 */
140static void init_arena_page_line_order(int *arena)
141{
142 int cur_page;
143 for (cur_page = 0; cur_page < ARENA_PAGES; cur_page++) {
144 /* for each page in the arena */
145 int cur_line;
146 for (cur_line = 0; cur_line < PAGE_LINES; cur_line++) {
147 /* for each line in the page */
148 const int idx = PAGE_AND_LINE_TO_IDX(cur_page,
149 cur_line);
150 const int next_line = page_line_order[cur_line];
151 int next_idx = PAGE_AND_LINE_TO_IDX(cur_page,
152 next_line);
153
154 if (0 == next_line) {
155 /* special case: cycle end */
156 if (cur_page < ARENA_PAGES - 1) {
157 /* arena has more pages: go to next */
158 next_idx = PAGE_AND_LINE_TO_IDX(
159 (cur_page + 1), 0);
160 } else {
161 /* the very last element */
162 next_idx = 0;
163 }
164 }
165 arena[idx] = next_idx;
166 }
167 }
90} 168}
91 169
92static void setup_colors(struct pthread_state *state) 170static void setup_colors(struct pthread_state *state)
93{ 171{
94 int i, j; 172 int color, i;
95 for (i = 0; i < USE_COLORS; i++) { 173 for (color = 0; color < USE_COLORS; color++) {
96 for (j = 0; j < CONTIG_COLORS; j++) 174 /* what color do we use */
97 state->color_ctrl->colors[CONTIG_COLORS * i + j] = i; 175 for (i = 0; i < CONTIG_COLORS; i++) {
176 /* how many times do we use it */
177 const int idx = CONTIG_COLORS * color + i;
178 state->color_ctrl->colors[idx] = color;
179 }
98 } 180 }
181#if 0
182 for (i = 0; i < ARENA_PAGES; i++)
183 printf("%d: %2d\n", i, state->color_ctrl->colors[i]);
184#endif
99} 185}
100 186
101static int loop_once(struct pthread_state *state) 187static int loop_once(struct pthread_state *state)
102{ 188{
103 int i, j = 0; 189 int i = 0, j;
104 for (i = 0; i < ARENA_INTS; i += 1) 190 do {
105 j = state->arena[i]; 191 i = state->arena[i];
192 j = i;
193 } while (i);
106 return j; 194 return j;
107} 195}
108 196
109int thread_init(struct pthread_state *state) 197int thread_init(struct pthread_state *state)
110{ 198{
111 const int cpu = THREAD_CPU(state->tid); 199 const int cpu = THREAD_CPU(state->tid);
112 cpu_set_t cpu_set;
113 int err = 0; 200 int err = 0;
114 201
115 mk_fname(state); 202 err = be_migrate_to(cpu);
116
117 state->file = fopen(state->fname, "w");
118 if (!state->file) {
119 debug_print_thread(state, "open file failed\n");
120 err = 1;
121 goto out;
122 }
123
124 CPU_ZERO(&cpu_set);
125 CPU_SET(cpu, &cpu_set);
126 err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set);
127 if (err) { 203 if (err) {
128 debug_print_thread(state, "set affinity failed\n"); 204 debug_print_thread(state, "set affinity failed\n");
129 goto out; 205 goto out;
@@ -144,6 +220,8 @@ int thread_init(struct pthread_state *state)
144 goto out; 220 goto out;
145 } 221 }
146 222
223 init_arena_page_line_order(state->arena);
224
147 err = mlockall(MCL_CURRENT|MCL_FUTURE); 225 err = mlockall(MCL_CURRENT|MCL_FUTURE);
148 if (err) 226 if (err)
149 { 227 {
@@ -154,76 +232,39 @@ out:
154 return err; 232 return err;
155} 233}
156 234
157#ifdef DEBUG 235static void change_counters(const int request)
158#define CHECKPRINT(func, ...) do { \
159 int err = func(__VA_ARGS__); \
160 if (err) \
161 fprintf(stderr, "T%d: func failed\n", state->tid); \
162} while (0)
163#else
164#define CHECKPRINT(func, ...) func(__VA_ARGS__)
165#endif
166
167static void get_time(struct timespec *ts)
168{
169 int err;
170#if defined(TIME_MONO)
171 err = clock_gettime(CLOCK_MONOTONIC, ts);
172#elif defined(TIME_THREAD)
173 err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts);
174#endif
175 if (err)
176 fprintf(stderr, "clock_gettime failed!\n");
177}
178
179static unsigned long ts_to_long(const struct timespec *ts)
180{ 236{
181 unsigned long x = (((unsigned long)ts->tv_sec) * 1e9) + 237 int cpu;
182 ((unsigned long)ts->tv_nsec); 238 for (cpu = 0; cpu < NR_CPUS; cpu++) {
183 return x; 239 const int ctr_idx = cpu * NR_PERF_COUNTERS;
184} 240 ioctl(perf_counters[ctr_idx].fd, request);
185 241 }
186static unsigned long ts_difference(const struct timespec *start,
187 const struct timespec *end)
188{
189 unsigned long t1 = ts_to_long(start);
190 unsigned long t2 = ts_to_long(end);
191 return (t2 - t1);
192} 242}
193 243
194void * thread_start(void *data) 244void * thread_start(void *data)
195{ 245{
196 struct pthread_state *state = (struct pthread_state*) data; 246 struct pthread_state *state = (struct pthread_state*) data;
197 struct timespec start, end;
198 unsigned long time;
199 int i; 247 int i;
200 248
201 state->retval = thread_init(state); 249 state->retval = thread_init(state);
202 if (state->retval) 250 if (state->retval)
203 goto out; 251 goto out;
204 252
205 loop_once(state); 253 pthread_barrier_wait(&barrier);
206 254
207 for (i = 0; i < NR_LOOPS; ++i) { 255 if (0 == state->tid)
208 pthread_barrier_wait(&start_barrier); 256 change_counters(PERF_EVENT_IOC_ENABLE);
209 257
210 get_time(&start); 258 pthread_barrier_wait(&barrier);
211 loop_once(state);
212 get_time(&end);
213
214 pthread_barrier_wait(&end_barrier);
215 259
216 time = ts_difference(&start, &end); 260 for (i = 0; i < NR_LOOPS; ++i) {
217 fprintf(state->file, "%lu\n", time); 261 loop_once(state);
218 state->retval = fflush(state->file); 262 pthread_barrier_wait(&barrier);
219 if (state->retval)
220 goto out;
221 state->retval = fsync(fileno(state->file));
222 if (state->retval)
223 goto out;
224 } 263 }
225 264
226 fclose(state->file); 265 if (0 == state->tid)
266 change_counters(PERF_EVENT_IOC_DISABLE);
267
227out: 268out:
228 pthread_exit(&state->retval); 269 pthread_exit(&state->retval);
229} 270}
@@ -249,7 +290,40 @@ out:
249 return err; 290 return err;
250} 291}
251 292
252static struct pthread_state *pthread_state; 293static int setup_perf_counters(void)
294{
295 int cpu, ret = 0;
296 for (cpu = 0; cpu < NR_CPUS; cpu++) {
297 const int idx = cpu * NR_PERF_COUNTERS;
298 const int group_leader = -1;
299 ret = setup_cpu_perf(cpu, group_leader, &perf_counters[idx]);
300 if (ret)
301 goto out;
302 }
303out:
304 return ret;
305}
306
307static void print_perf_counters(void)
308{
309 uint64_t val;
310 int err, cpu, i;
311 const char *name;
312
313 for (cpu = 0; cpu < NR_CPUS; cpu++) {
314 printf("CPU %d\n", cpu);
315 for (i = 0; i < NR_PERF_COUNTERS; i++) {
316 const int idx = cpu * NR_PERF_COUNTERS + i;
317 name = get_perf_name(&perf_counters[idx]);
318 err = read_perf_counter(&perf_counters[idx], &val);
319 if (err)
320 printf("%50s: ERROR\n", name);
321 else
322 printf("%50s: %10.3f\n", name,
323 ((double)val) / NR_LOOPS);
324 }
325 }
326}
253 327
254#define CHECK(fun, ...) { \ 328#define CHECK(fun, ...) { \
255 int err = fun(__VA_ARGS__); \ 329 int err = fun(__VA_ARGS__); \
@@ -263,6 +337,8 @@ int main(int argc, char **argv)
263{ 337{
264 int ret = 0, err, i; 338 int ret = 0, err, i;
265 long int strtol_val; 339 long int strtol_val;
340
341 struct pthread_state *pthread_state;
266 pthread_attr_t attr; 342 pthread_attr_t attr;
267 343
268 if (argc < 3) { 344 if (argc < 3) {
@@ -272,11 +348,11 @@ int main(int argc, char **argv)
272 goto out; 348 goto out;
273 } 349 }
274 350
275 err = xstrtol(argv[1], &strtol_val); 351 err = xstrtol(argv[1], &strtol_val);
276 nr_threads = strtol_val; 352 nr_threads = strtol_val;
277 err |= xstrtol(argv[2], &strtol_val); 353 err |= xstrtol(argv[2], &strtol_val);
278 arena_size = strtol_val; 354 arena_size = strtol_val;
279 err |= xstrtol(argv[3], &strtol_val); 355 err |= xstrtol(argv[3], &strtol_val);
280 color_shift = strtol_val; 356 color_shift = strtol_val;
281 if (err) { 357 if (err) {
282 debug_print("non-integer argument?\n"); 358 debug_print("non-integer argument?\n");
@@ -289,20 +365,25 @@ int main(int argc, char **argv)
289 goto out; 365 goto out;
290 } 366 }
291 367
292 pthread_state = malloc(nr_threads * sizeof(*pthread_state)); 368 pthread_state = malloc(nr_threads * sizeof(*pthread_state));
293 if (!pthread_state) { 369 page_line_order = malloc(PAGE_LINES * sizeof(*page_line_order));
294 debug_print("could not malloc for state\n"); 370 if (!pthread_state || !page_line_order) {
371 debug_print("could not malloc\n");
295 ret = 1; 372 ret = 1;
296 goto out; 373 goto out;
297 } 374 }
298 375
376 CHECK(setup_perf_counters);
377
299 CHECK(pthread_attr_init, &attr); 378 CHECK(pthread_attr_init, &attr);
300 CHECK(pthread_attr_setdetachstate, &attr, PTHREAD_CREATE_JOINABLE); 379 CHECK(pthread_attr_setdetachstate, &attr, PTHREAD_CREATE_JOINABLE);
301 CHECK(pthread_barrier_init, &start_barrier, NULL, nr_threads); 380 CHECK(pthread_barrier_init, &barrier, NULL, nr_threads);
302 CHECK(pthread_barrier_init, &end_barrier, NULL, nr_threads); 381
382 sattolo(page_line_order, PAGE_LINES);
383 //sequential(page_line_order, PAGE_LINES);
303 384
304 for (i = 0; i < nr_threads; i++) { 385 for (i = 0; i < nr_threads; i++) {
305 pthread_state[i].tid= i; 386 pthread_state[i].tid = i;
306 CHECK(pthread_create, &pthread_state[i].thread, &attr, thread_start, 387 CHECK(pthread_create, &pthread_state[i].thread, &attr, thread_start,
307 (void*)&pthread_state[i]); 388 (void*)&pthread_state[i]);
308 } 389 }
@@ -321,10 +402,12 @@ int main(int argc, char **argv)
321 } 402 }
322 } 403 }
323 404
324 CHECK(pthread_barrier_destroy, &start_barrier); 405 CHECK(pthread_barrier_destroy, &barrier);
325 CHECK(pthread_barrier_destroy, &end_barrier);
326 CHECK(pthread_attr_destroy, &attr); 406 CHECK(pthread_attr_destroy, &attr);
327 407
408 printf("arena pages: %d\n", ARENA_PAGES);
409 print_perf_counters();
410
328out: 411out:
329 return ret; 412 return ret;
330} 413}
diff --git a/bin/perfcounters.c b/bin/perfcounters.c
new file mode 100644
index 0000000..402e047
--- /dev/null
+++ b/bin/perfcounters.c
@@ -0,0 +1,187 @@
1#include "asm/unistd.h" /* from kernel source tree */
2#include <unistd.h> /* for syscall */
3
4#include <sys/ioctl.h>
5
6#include "perfcounters.h"
7
8#define C(x) (PERF_COUNT_HW_CACHE_##x)
9#define ATTR_CONFIG_CACHE(cache, op, result) \
10 (((C(cache) & 0xffULL) << 0) | \
11 ((C(op) & 0xffULL) << 8) | \
12 ((C(result) & 0xffULL) << 16))
13
14#define ATTR_CONFIG(event, umask) \
15 ((((event) & 0xffULL) << 0) | \
16 (((umask) & 0xffULL) << 8))
17
18static struct perf_event_attr perf_event_attr = {
19 .type = 0, /* set per initilized event */
20 .size = 0, /* set later */
21 .config = 0, /* set per initilized event */
22 { .sample_period = 0, }, /* is a counter, so no period */
23 .disabled = 0, /* event is enabled */
24 .inherit = 0, /* children don't inherit */
25 .pinned = 0, /* set per initilized event */
26 .exclusive = 0, /* set per initilized event */
27 .exclude_user = 0, /* don't count user (when set) */
28 .exclude_kernel = 0, /* ditto kernel */
29 .exclude_hv = 0, /* ditto hypervisor */
30 .exclude_idle = 0, /* don't count when idle */
31 .mmap = 0, /* include mmap data */
32 .comm = 0, /* include comm data */
33};
34
35struct perf_counter_setup {
36 char *name;
37 enum perf_type_id type;
38 uint64_t config;
39};
40
41#if 0
42/* these events are always zero */
43static struct perf_fd perf_fds[] = {
44 {
45 .fd = -1,
46 .name = "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT",
47 .type = PERF_TYPE_RAW,
48 .config = ATTR_CONFIG(0x0f, 0x08),
49 .exclusive = 0,
50 .pinned = 0,
51 },
52 {
53 .fd = -1,
54 .name = "MEM_UNCORE_RETIRED.REMOTE_DRAM",
55 .type = PERF_TYPE_RAW,
56 .config = ATTR_CONFIG(0x0f, 0x10),
57 .exclusive = 0, /* child events cannot be exclusive */
58 .pinned = 0, /* child events cannot be pinned */
59 },
60 { },
61};
62#endif
63
64static struct perf_counter_setup perf_setup[NR_PERF_COUNTERS] = {
65#if 0
66 {
67 .name = "MEM_UNCORE_RETIRED.LOCAL_DRAM",
68 .type = PERF_TYPE_RAW,
69 .config = ATTR_CONFIG(0x0f, 0x20),
70 },
71#endif
72 {
73 .name = "L2_RQSTS.PREFETCH_HIT",
74 .type = PERF_TYPE_RAW,
75 .config = ATTR_CONFIG(0x24, 0x40),
76 },
77 {
78 .name = "L2_RQSTS.PREFETCH_MISS",
79 .type = PERF_TYPE_RAW,
80 .config = ATTR_CONFIG(0x24, 0x80),
81 },
82 {
83 .name = "MEM_LOAD_RETIRED.L3_MISS",
84 .type = PERF_TYPE_RAW,
85 .config = ATTR_CONFIG(0xcb, 0x10),
86 },
87 {
88 .name = "Off Core Response Counter",
89 .type = PERF_TYPE_HW_CACHE,
90 .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS),
91#if 0
92 /* read misses */
93 .config = ATTR_CONFIG_CACHE(LL, OP_READ, RESULT_MISS),
94 /* write misses */
95 .config = ATTR_CONFIG_CACHE(LL, OP_WRITE, RESULT_MISS),
96 /* prefetch misses */
97 .config = ATTR_CONFIG_CACHE(LL, OP_PREFETCH, RESULT_MISS),
98#endif
99 },
100};
101
102
103/* from kernel tools/perf/perf.h */
104int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
105 int cpu, int group_fd, unsigned long flags)
106{
107 attr->size = sizeof(*attr);
108 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
109}
110
111/* make the temporary attributes shadow those in the perf_fd temporarially */
112static void write_global_perf_attr(const struct perf_counter_setup *p)
113{
114 perf_event_attr.type = p->type;
115 perf_event_attr.config = p->config;
116 perf_event_attr.pinned = 0;
117 perf_event_attr.exclusive = 0;
118}
119
120int setup_cpu_perf(const int cpu, const int group_leader,
121 struct perf_counter *perf_counters)
122{
123 const int perf_pid = -1; /* -1: all tasks */
124 int err = 0, i;
125
126 if (-1 == group_leader) {
127 /* first element determines the group for all others */
128 perf_counters->fd = -1;
129 }
130
131 for (i = 0; i < NR_PERF_COUNTERS; i++) {
132 int perf_group;
133
134 /* setup the attributes to pass in */
135 write_global_perf_attr(&perf_setup[i]);
136
137 if (0 == i && -1 == group_leader) {
138 /* but group leader is pinned and exclusive */
139 perf_event_attr.exclusive = 1;
140 perf_event_attr.pinned = 1;
141 perf_group = -1;
142 } else if (-1 == group_leader) {
143 /* not first counter, but no group passed in */
144 perf_group = perf_counters[0].fd;
145 }
146
147 perf_counters[i].fd = sys_perf_event_open(&perf_event_attr,
148 perf_pid, cpu, perf_group, 0);
149
150 if (0 > perf_counters[i].fd) {
151 err = -1;
152 goto out;
153 }
154
155 /* save the attributes in the user-visible configuration */
156 perf_counters[i].type = perf_setup[i].type;
157 perf_counters[i].config = perf_setup[i].config;
158 }
159out:
160 return err;
161}
162
163static inline int perf_setup_match(const struct perf_counter_setup* ps,
164 const struct perf_counter *pc)
165{
166 return (ps->type == pc->type && ps->config == pc->config);
167}
168
169const char* get_perf_name(const struct perf_counter* perf_counter)
170{
171 char *ret = NULL;
172 int i;
173
174 for (i = 0; i < NR_PERF_COUNTERS; i++) {
175 if (perf_setup_match(&perf_setup[i], perf_counter)) {
176 ret = perf_setup[i].name;
177 break;
178 }
179 }
180 return ret;
181}
182
183int read_perf_counter(const struct perf_counter* perf_counter, uint64_t *val)
184{
185 ssize_t ret = read(perf_counter->fd, val, sizeof(*val));
186 return (ret <= 0);
187}
diff --git a/include/perfcounters.h b/include/perfcounters.h
new file mode 100644
index 0000000..03f94fb
--- /dev/null
+++ b/include/perfcounters.h
@@ -0,0 +1,36 @@
1#ifndef PERFCOUNTERS_H
2#define PERFCOUNTERS_H
3
4#include <stdint.h>
5
6#include "../../litmus-rt/include/linux/perf_event.h"
7
8#define NR_PERF_COUNTERS 4
9
10/*
11 * Retain this information with a performance counter file descriptor.
12 */
13struct perf_counter {
14 int fd;
15 enum perf_type_id type;
16 uint64_t config;
17};
18
19
20/*
21 * Initialize a set of counters for a CPU.
22 *
23 * This is NOT thread safe!
24 *
25 * @cpu CPU
26 * @group_leader group leader PID or -1 make a new group
27 * @perf_counter array
28 * @return 0 or error
29 */
30int setup_cpu_perf(const int, const int, struct perf_counter*);
31
32const char* get_perf_name(const struct perf_counter*);
33
34int read_perf_counter(const struct perf_counter*, uint64_t*);
35
36#endif