aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2011-02-14 04:20:01 -0500
committerIngo Molnar <mingo@elte.hu>2011-02-16 07:30:48 -0500
commit023695d96ee06f36cf5014e286edcd623e9fb847 (patch)
treeff7483b7a1aa0cfd5de95475ed059822d2a35499
parente5d1367f17ba6a6fed5fd8b74e4d5720923e0c25 (diff)
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups (cgroups) for both perf stat and perf record. It is possible to monitor multiple cgroup in parallel. There is one cgroup per event. The cgroups to monitor are passed via a new -G option followed by a comma separated list of cgroup names. The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool finds the corresponding directory in the cgroup filesystem and opens it. It then passes that file descriptor to the kernel. Example: $ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1 Performance counter stats for 'sleep 1': 2,368,667,414 cycles test1 2,369,661,459 cycles <not counted> cycles test2 1.001856890 seconds time elapsed Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--tools/perf/Documentation/perf-record.txt11
-rw-r--r--tools/perf/Documentation/perf-stat.txt11
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/builtin-record.c9
-rw-r--r--tools/perf/builtin-stat.c40
-rw-r--r--tools/perf/util/cgroup.c178
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/evsel.c16
-rw-r--r--tools/perf/util/evsel.h2
9 files changed, 276 insertions, 10 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e032716c839b..5a520f825295 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -137,6 +137,17 @@ Do not update the builid cache. This saves some overhead in situations
137where the information in the perf.data file (which includes buildids) 137where the information in the perf.data file (which includes buildids)
138is sufficient. 138is sufficient.
139 139
140-G name,...::
141--cgroup name,...::
142monitor only in the container (cgroup) called "name". This option is available only
143in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
144container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
145can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
146to first event, second cgroup to second event and so on. It is possible to provide
147an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
148corresponding events, i.e., they always refer to events defined earlier on the command
149line.
150
140SEE ALSO 151SEE ALSO
141-------- 152--------
142linkperf:perf-stat[1], linkperf:perf-list[1] 153linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b6da7affbbee..918cc38ee6d1 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -83,6 +83,17 @@ This option is only valid in system-wide mode.
83print counts using a CSV-style output to make it easy to import directly into 83print counts using a CSV-style output to make it easy to import directly into
84spreadsheets. Columns are separated by the string specified in SEP. 84spreadsheets. Columns are separated by the string specified in SEP.
85 85
86-G name::
87--cgroup name::
88monitor only in the container (cgroup) called "name". This option is available only
89in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
90container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
91can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
92to first event, second cgroup to second event and so on. It is possible to provide
93an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
94corresponding events, i.e., they always refer to events defined earlier on the command
95line.
96
86EXAMPLES 97EXAMPLES
87-------- 98--------
88 99
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 94f73abdc56a..bc4d9bf8a556 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -442,6 +442,7 @@ LIB_H += util/pstack.h
442LIB_H += util/cpumap.h 442LIB_H += util/cpumap.h
443LIB_H += util/top.h 443LIB_H += util/top.h
444LIB_H += $(ARCH_INCLUDE) 444LIB_H += $(ARCH_INCLUDE)
445LIB_H += util/cgroup.h
445 446
446LIB_OBJS += $(OUTPUT)util/abspath.o 447LIB_OBJS += $(OUTPUT)util/abspath.o
447LIB_OBJS += $(OUTPUT)util/alias.o 448LIB_OBJS += $(OUTPUT)util/alias.o
@@ -496,6 +497,7 @@ LIB_OBJS += $(OUTPUT)util/probe-event.o
496LIB_OBJS += $(OUTPUT)util/util.o 497LIB_OBJS += $(OUTPUT)util/util.o
497LIB_OBJS += $(OUTPUT)util/xyarray.o 498LIB_OBJS += $(OUTPUT)util/xyarray.o
498LIB_OBJS += $(OUTPUT)util/cpumap.o 499LIB_OBJS += $(OUTPUT)util/cpumap.o
500LIB_OBJS += $(OUTPUT)util/cgroup.o
499 501
500BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o 502BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
501 503
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 12e0e41696d9..a4aaadcb4c8b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -807,6 +807,9 @@ const struct option record_options[] = {
807 "do not update the buildid cache"), 807 "do not update the buildid cache"),
808 OPT_BOOLEAN('B', "no-buildid", &no_buildid, 808 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
809 "do not collect buildids in perf.data"), 809 "do not collect buildids in perf.data"),
810 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
811 "monitor event in cgroup name only",
812 parse_cgroups),
810 OPT_END() 813 OPT_END()
811}; 814};
812 815
@@ -835,6 +838,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
835 write_mode = WRITE_FORCE; 838 write_mode = WRITE_FORCE;
836 } 839 }
837 840
841 if (nr_cgroups && !system_wide) {
842 fprintf(stderr, "cgroup monitoring only available in"
843 " system-wide mode\n");
844 usage_with_options(record_usage, record_options);
845 }
846
838 symbol__init(); 847 symbol__init();
839 848
840 if (no_buildid_cache || no_buildid) 849 if (no_buildid_cache || no_buildid)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 806a9998fcd5..21c025222496 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -390,6 +390,9 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
390 390
391 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); 391 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
392 392
393 if (evsel->cgrp)
394 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
395
393 if (csv_output) 396 if (csv_output)
394 return; 397 return;
395 398
@@ -420,6 +423,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
420 423
421 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); 424 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
422 425
426 if (evsel->cgrp)
427 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
428
423 if (csv_output) 429 if (csv_output)
424 return; 430 return;
425 431
@@ -460,9 +466,17 @@ static void print_counter_aggr(struct perf_evsel *counter)
460 int scaled = counter->counts->scaled; 466 int scaled = counter->counts->scaled;
461 467
462 if (scaled == -1) { 468 if (scaled == -1) {
463 fprintf(stderr, "%*s%s%-24s\n", 469 fprintf(stderr, "%*s%s%*s",
464 csv_output ? 0 : 18, 470 csv_output ? 0 : 18,
465 "<not counted>", csv_sep, event_name(counter)); 471 "<not counted>",
472 csv_sep,
473 csv_output ? 0 : -24,
474 event_name(counter));
475
476 if (counter->cgrp)
477 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
478
479 fputc('\n', stderr);
466 return; 480 return;
467 } 481 }
468 482
@@ -487,7 +501,6 @@ static void print_counter_aggr(struct perf_evsel *counter)
487 fprintf(stderr, " (scaled from %.2f%%)", 501 fprintf(stderr, " (scaled from %.2f%%)",
488 100 * avg_running / avg_enabled); 502 100 * avg_running / avg_enabled);
489 } 503 }
490
491 fprintf(stderr, "\n"); 504 fprintf(stderr, "\n");
492} 505}
493 506
@@ -505,14 +518,18 @@ static void print_counter(struct perf_evsel *counter)
505 ena = counter->counts->cpu[cpu].ena; 518 ena = counter->counts->cpu[cpu].ena;
506 run = counter->counts->cpu[cpu].run; 519 run = counter->counts->cpu[cpu].run;
507 if (run == 0 || ena == 0) { 520 if (run == 0 || ena == 0) {
508 fprintf(stderr, "CPU%*d%s%*s%s%-24s", 521 fprintf(stderr, "CPU%*d%s%*s%s%*s",
509 csv_output ? 0 : -4, 522 csv_output ? 0 : -4,
510 evsel_list->cpus->map[cpu], csv_sep, 523 evsel_list->cpus->map[cpu], csv_sep,
511 csv_output ? 0 : 18, 524 csv_output ? 0 : 18,
512 "<not counted>", csv_sep, 525 "<not counted>", csv_sep,
526 csv_output ? 0 : -24,
513 event_name(counter)); 527 event_name(counter));
514 528
515 fprintf(stderr, "\n"); 529 if (counter->cgrp)
530 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
531
532 fputc('\n', stderr);
516 continue; 533 continue;
517 } 534 }
518 535
@@ -529,7 +546,7 @@ static void print_counter(struct perf_evsel *counter)
529 100.0 * run / ena); 546 100.0 * run / ena);
530 } 547 }
531 } 548 }
532 fprintf(stderr, "\n"); 549 fputc('\n', stderr);
533 } 550 }
534} 551}
535 552
@@ -642,6 +659,9 @@ static const struct option options[] = {
642 "disable CPU count aggregation"), 659 "disable CPU count aggregation"),
643 OPT_STRING('x', "field-separator", &csv_sep, "separator", 660 OPT_STRING('x', "field-separator", &csv_sep, "separator",
644 "print counts with custom separator"), 661 "print counts with custom separator"),
662 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
663 "monitor event in cgroup name only",
664 parse_cgroups),
645 OPT_END() 665 OPT_END()
646}; 666};
647 667
@@ -682,9 +702,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
682 if (run_count <= 0) 702 if (run_count <= 0)
683 usage_with_options(stat_usage, options); 703 usage_with_options(stat_usage, options);
684 704
685 /* no_aggr is for system-wide only */ 705 /* no_aggr, cgroup are for system-wide only */
686 if (no_aggr && !system_wide) 706 if ((no_aggr || nr_cgroups) && !system_wide) {
707 fprintf(stderr, "both cgroup and no-aggregation "
708 "modes only available in system-wide mode\n");
709
687 usage_with_options(stat_usage, options); 710 usage_with_options(stat_usage, options);
711 }
688 712
689 /* Set attrs and nr_counters if no event is selected and !null_run */ 713 /* Set attrs and nr_counters if no event is selected and !null_run */
690 if (!null_run && !evsel_list->nr_entries) { 714 if (!null_run && !evsel_list->nr_entries) {
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000000..9fea75535221
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,178 @@
1#include "util.h"
2#include "../perf.h"
3#include "parse-options.h"
4#include "evsel.h"
5#include "cgroup.h"
6#include "debugfs.h" /* MAX_PATH, STR() */
7#include "evlist.h"
8
9int nr_cgroups;
10
11static int
12cgroupfs_find_mountpoint(char *buf, size_t maxlen)
13{
14 FILE *fp;
15 char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1];
16 char *token, *saved_ptr;
17 int found = 0;
18
19 fp = fopen("/proc/mounts", "r");
20 if (!fp)
21 return -1;
22
23 /*
24 * in order to handle split hierarchy, we need to scan /proc/mounts
25 * and inspect every cgroupfs mount point to find one that has
26 * perf_event subsystem
27 */
28 while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %"
29 STR(MAX_PATH)"s %*d %*d\n",
30 mountpoint, type, tokens) == 3) {
31
32 if (!strcmp(type, "cgroup")) {
33
34 token = strtok_r(tokens, ",", &saved_ptr);
35
36 while (token != NULL) {
37 if (!strcmp(token, "perf_event")) {
38 found = 1;
39 break;
40 }
41 token = strtok_r(NULL, ",", &saved_ptr);
42 }
43 }
44 if (found)
45 break;
46 }
47 fclose(fp);
48 if (!found)
49 return -1;
50
51 if (strlen(mountpoint) < maxlen) {
52 strcpy(buf, mountpoint);
53 return 0;
54 }
55 return -1;
56}
57
58static int open_cgroup(char *name)
59{
60 char path[MAX_PATH+1];
61 char mnt[MAX_PATH+1];
62 int fd;
63
64
65 if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1))
66 return -1;
67
68 snprintf(path, MAX_PATH, "%s/%s", mnt, name);
69
70 fd = open(path, O_RDONLY);
71 if (fd == -1)
72 fprintf(stderr, "no access to cgroup %s\n", path);
73
74 return fd;
75}
76
77static int add_cgroup(struct perf_evlist *evlist, char *str)
78{
79 struct perf_evsel *counter;
80 struct cgroup_sel *cgrp = NULL;
81 int n;
82 /*
83 * check if cgrp is already defined, if so we reuse it
84 */
85 list_for_each_entry(counter, &evlist->entries, node) {
86 cgrp = counter->cgrp;
87 if (!cgrp)
88 continue;
89 if (!strcmp(cgrp->name, str))
90 break;
91
92 cgrp = NULL;
93 }
94
95 if (!cgrp) {
96 cgrp = zalloc(sizeof(*cgrp));
97 if (!cgrp)
98 return -1;
99
100 cgrp->name = str;
101
102 cgrp->fd = open_cgroup(str);
103 if (cgrp->fd == -1) {
104 free(cgrp);
105 return -1;
106 }
107 }
108
109 /*
110 * find corresponding event
111 * if add cgroup N, then need to find event N
112 */
113 n = 0;
114 list_for_each_entry(counter, &evlist->entries, node) {
115 if (n == nr_cgroups)
116 goto found;
117 n++;
118 }
119 if (cgrp->refcnt == 0)
120 free(cgrp);
121
122 return -1;
123found:
124 cgrp->refcnt++;
125 counter->cgrp = cgrp;
126 return 0;
127}
128
129void close_cgroup(struct cgroup_sel *cgrp)
130{
131 if (!cgrp)
132 return;
133
134 /* XXX: not reentrant */
135 if (--cgrp->refcnt == 0) {
136 close(cgrp->fd);
137 free(cgrp->name);
138 free(cgrp);
139 }
140}
141
142int parse_cgroups(const struct option *opt __used, const char *str,
143 int unset __used)
144{
145 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
146 const char *p, *e, *eos = str + strlen(str);
147 char *s;
148 int ret;
149
150 if (list_empty(&evlist->entries)) {
151 fprintf(stderr, "must define events before cgroups\n");
152 return -1;
153 }
154
155 for (;;) {
156 p = strchr(str, ',');
157 e = p ? p : eos;
158
159 /* allow empty cgroups, i.e., skip */
160 if (e - str) {
161 /* termination added */
162 s = strndup(str, e - str);
163 if (!s)
164 return -1;
165 ret = add_cgroup(evlist, s);
166 if (ret) {
167 free(s);
168 return -1;
169 }
170 }
171 /* nr_cgroups is increased een for empty cgroups */
172 nr_cgroups++;
173 if (!p)
174 break;
175 str = p+1;
176 }
177 return 0;
178}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000000..89acd6debdc5
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,17 @@
1#ifndef __CGROUP_H__
2#define __CGROUP_H__
3
4struct option;
5
6struct cgroup_sel {
7 char *name;
8 int fd;
9 int refcnt;
10};
11
12
13extern int nr_cgroups; /* number of explicit cgroups defined */
14extern void close_cgroup(struct cgroup_sel *cgrp);
15extern int parse_cgroups(const struct option *opt, const char *str, int unset);
16
17#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 211063eed474..c974e08d07ab 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -85,6 +85,7 @@ void perf_evsel__exit(struct perf_evsel *evsel)
85void perf_evsel__delete(struct perf_evsel *evsel) 85void perf_evsel__delete(struct perf_evsel *evsel)
86{ 86{
87 perf_evsel__exit(evsel); 87 perf_evsel__exit(evsel);
88 close_cgroup(evsel->cgrp);
88 free(evsel); 89 free(evsel);
89} 90}
90 91
@@ -163,21 +164,32 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
163 struct thread_map *threads, bool group, bool inherit) 164 struct thread_map *threads, bool group, bool inherit)
164{ 165{
165 int cpu, thread; 166 int cpu, thread;
167 unsigned long flags = 0;
168 int pid = -1;
166 169
167 if (evsel->fd == NULL && 170 if (evsel->fd == NULL &&
168 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 171 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
169 return -1; 172 return -1;
170 173
174 if (evsel->cgrp) {
175 flags = PERF_FLAG_PID_CGROUP;
176 pid = evsel->cgrp->fd;
177 }
178
171 for (cpu = 0; cpu < cpus->nr; cpu++) { 179 for (cpu = 0; cpu < cpus->nr; cpu++) {
172 int group_fd = -1; 180 int group_fd = -1;
173 181
174 evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit; 182 evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit;
175 183
176 for (thread = 0; thread < threads->nr; thread++) { 184 for (thread = 0; thread < threads->nr; thread++) {
185
186 if (!evsel->cgrp)
187 pid = threads->map[thread];
188
177 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 189 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
178 threads->map[thread], 190 pid,
179 cpus->map[cpu], 191 cpus->map[cpu],
180 group_fd, 0); 192 group_fd, flags);
181 if (FD(evsel, cpu, thread) < 0) 193 if (FD(evsel, cpu, thread) < 0)
182 goto out_close; 194 goto out_close;
183 195
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index eecdc3aabc14..1d3d5a3dbe60 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -6,6 +6,7 @@
6#include "../../../include/linux/perf_event.h" 6#include "../../../include/linux/perf_event.h"
7#include "types.h" 7#include "types.h"
8#include "xyarray.h" 8#include "xyarray.h"
9#include "cgroup.h"
9 10
10struct perf_counts_values { 11struct perf_counts_values {
11 union { 12 union {
@@ -45,6 +46,7 @@ struct perf_evsel {
45 struct perf_counts *counts; 46 struct perf_counts *counts;
46 int idx; 47 int idx;
47 void *priv; 48 void *priv;
49 struct cgroup_sel *cgrp;
48}; 50};
49 51
50struct cpu_map; 52struct cpu_map;