diff options
author | Zhang, Yanmin <yanmin_zhang@linux.intel.com> | 2010-03-18 10:36:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-18 11:21:12 -0400 |
commit | d6d901c23a9c4c7361aa901b5b2dda69703dd5e0 (patch) | |
tree | 601fc2cafac552c80b8456c8dd4b9964171552db /tools/perf/builtin-top.c | |
parent | 46be604b5ba738d53e5f5314813a4e7092864baf (diff) |
perf events: Change perf parameter --pid to process-wide collection instead of thread-wide
Parameter --pid (or -p) of perf currently means a thread-wide
collection. For exmaple, if a process whose id is 8888 has 10
threads, 'perf top -p 8888' just collects the main thread
statistics. That's misleading. Users are used to attach a whole
process when debugging a process by gdb. To follow normal usage
style, the patch change --pid to process-wide collection and add
--tid (-t) to mean a thread-wide collection.
Usage example is:
# perf top -p 8888
# perf record -p 8888 -f sleep 10
# perf stat -p 8888 -f sleep 10
Above commands collect the statistics of all threads of process
8888.
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jes Sorensen <Jes.Sorensen@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: zhiteng.huang@intel.com
Cc: Zachary Amsden <zamsden@redhat.com>
LKML-Reference: <1268922965-14774-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf/builtin-top.c')
-rw-r--r-- | tools/perf/builtin-top.c | 162 |
1 files changed, 105 insertions, 57 deletions
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 887ebbf5d1ff..5f3ac9ff354d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -55,7 +55,7 @@ | |||
55 | #include <linux/unistd.h> | 55 | #include <linux/unistd.h> |
56 | #include <linux/types.h> | 56 | #include <linux/types.h> |
57 | 57 | ||
58 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 58 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
59 | 59 | ||
60 | static int system_wide = 0; | 60 | static int system_wide = 0; |
61 | 61 | ||
@@ -65,6 +65,9 @@ static int count_filter = 5; | |||
65 | static int print_entries; | 65 | static int print_entries; |
66 | 66 | ||
67 | static int target_pid = -1; | 67 | static int target_pid = -1; |
68 | static int target_tid = -1; | ||
69 | static pid_t *all_tids = NULL; | ||
70 | static int thread_num = 0; | ||
68 | static int inherit = 0; | 71 | static int inherit = 0; |
69 | static int profile_cpu = -1; | 72 | static int profile_cpu = -1; |
70 | static int nr_cpus = 0; | 73 | static int nr_cpus = 0; |
@@ -524,13 +527,15 @@ static void print_sym_table(void) | |||
524 | 527 | ||
525 | if (target_pid != -1) | 528 | if (target_pid != -1) |
526 | printf(" (target_pid: %d", target_pid); | 529 | printf(" (target_pid: %d", target_pid); |
530 | else if (target_tid != -1) | ||
531 | printf(" (target_tid: %d", target_tid); | ||
527 | else | 532 | else |
528 | printf(" (all"); | 533 | printf(" (all"); |
529 | 534 | ||
530 | if (profile_cpu != -1) | 535 | if (profile_cpu != -1) |
531 | printf(", cpu: %d)\n", profile_cpu); | 536 | printf(", cpu: %d)\n", profile_cpu); |
532 | else { | 537 | else { |
533 | if (target_pid != -1) | 538 | if (target_tid != -1) |
534 | printf(")\n"); | 539 | printf(")\n"); |
535 | else | 540 | else |
536 | printf(", %d CPUs)\n", nr_cpus); | 541 | printf(", %d CPUs)\n", nr_cpus); |
@@ -1129,16 +1134,21 @@ static void perf_session__mmap_read_counter(struct perf_session *self, | |||
1129 | md->prev = old; | 1134 | md->prev = old; |
1130 | } | 1135 | } |
1131 | 1136 | ||
1132 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | 1137 | static struct pollfd *event_array; |
1133 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | 1138 | static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; |
1134 | 1139 | ||
1135 | static void perf_session__mmap_read(struct perf_session *self) | 1140 | static void perf_session__mmap_read(struct perf_session *self) |
1136 | { | 1141 | { |
1137 | int i, counter; | 1142 | int i, counter, thread_index; |
1138 | 1143 | ||
1139 | for (i = 0; i < nr_cpus; i++) { | 1144 | for (i = 0; i < nr_cpus; i++) { |
1140 | for (counter = 0; counter < nr_counters; counter++) | 1145 | for (counter = 0; counter < nr_counters; counter++) |
1141 | perf_session__mmap_read_counter(self, &mmap_array[i][counter]); | 1146 | for (thread_index = 0; |
1147 | thread_index < thread_num; | ||
1148 | thread_index++) { | ||
1149 | perf_session__mmap_read_counter(self, | ||
1150 | &mmap_array[i][counter][thread_index]); | ||
1151 | } | ||
1142 | } | 1152 | } |
1143 | } | 1153 | } |
1144 | 1154 | ||
@@ -1149,9 +1159,10 @@ static void start_counter(int i, int counter) | |||
1149 | { | 1159 | { |
1150 | struct perf_event_attr *attr; | 1160 | struct perf_event_attr *attr; |
1151 | int cpu; | 1161 | int cpu; |
1162 | int thread_index; | ||
1152 | 1163 | ||
1153 | cpu = profile_cpu; | 1164 | cpu = profile_cpu; |
1154 | if (target_pid == -1 && profile_cpu == -1) | 1165 | if (target_tid == -1 && profile_cpu == -1) |
1155 | cpu = cpumap[i]; | 1166 | cpu = cpumap[i]; |
1156 | 1167 | ||
1157 | attr = attrs + counter; | 1168 | attr = attrs + counter; |
@@ -1167,55 +1178,58 @@ static void start_counter(int i, int counter) | |||
1167 | attr->inherit = (cpu < 0) && inherit; | 1178 | attr->inherit = (cpu < 0) && inherit; |
1168 | attr->mmap = 1; | 1179 | attr->mmap = 1; |
1169 | 1180 | ||
1181 | for (thread_index = 0; thread_index < thread_num; thread_index++) { | ||
1170 | try_again: | 1182 | try_again: |
1171 | fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); | 1183 | fd[i][counter][thread_index] = sys_perf_event_open(attr, |
1172 | 1184 | all_tids[thread_index], cpu, group_fd, 0); | |
1173 | if (fd[i][counter] < 0) { | 1185 | |
1174 | int err = errno; | 1186 | if (fd[i][counter][thread_index] < 0) { |
1187 | int err = errno; | ||
1188 | |||
1189 | if (err == EPERM || err == EACCES) | ||
1190 | die("No permission - are you root?\n"); | ||
1191 | /* | ||
1192 | * If it's cycles then fall back to hrtimer | ||
1193 | * based cpu-clock-tick sw counter, which | ||
1194 | * is always available even if no PMU support: | ||
1195 | */ | ||
1196 | if (attr->type == PERF_TYPE_HARDWARE | ||
1197 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | ||
1198 | |||
1199 | if (verbose) | ||
1200 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | ||
1201 | |||
1202 | attr->type = PERF_TYPE_SOFTWARE; | ||
1203 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | ||
1204 | goto try_again; | ||
1205 | } | ||
1206 | printf("\n"); | ||
1207 | error("perfcounter syscall returned with %d (%s)\n", | ||
1208 | fd[i][counter][thread_index], strerror(err)); | ||
1209 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
1210 | exit(-1); | ||
1211 | } | ||
1212 | assert(fd[i][counter][thread_index] >= 0); | ||
1213 | fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); | ||
1175 | 1214 | ||
1176 | if (err == EPERM || err == EACCES) | ||
1177 | die("No permission - are you root?\n"); | ||
1178 | /* | 1215 | /* |
1179 | * If it's cycles then fall back to hrtimer | 1216 | * First counter acts as the group leader: |
1180 | * based cpu-clock-tick sw counter, which | ||
1181 | * is always available even if no PMU support: | ||
1182 | */ | 1217 | */ |
1183 | if (attr->type == PERF_TYPE_HARDWARE | 1218 | if (group && group_fd == -1) |
1184 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | 1219 | group_fd = fd[i][counter][thread_index]; |
1185 | 1220 | ||
1186 | if (verbose) | 1221 | event_array[nr_poll].fd = fd[i][counter][thread_index]; |
1187 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | 1222 | event_array[nr_poll].events = POLLIN; |
1188 | 1223 | nr_poll++; | |
1189 | attr->type = PERF_TYPE_SOFTWARE; | 1224 | |
1190 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | 1225 | mmap_array[i][counter][thread_index].counter = counter; |
1191 | goto try_again; | 1226 | mmap_array[i][counter][thread_index].prev = 0; |
1192 | } | 1227 | mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; |
1193 | printf("\n"); | 1228 | mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, |
1194 | error("perfcounter syscall returned with %d (%s)\n", | 1229 | PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); |
1195 | fd[i][counter], strerror(err)); | 1230 | if (mmap_array[i][counter][thread_index].base == MAP_FAILED) |
1196 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | 1231 | die("failed to mmap with %d (%s)\n", errno, strerror(errno)); |
1197 | exit(-1); | ||
1198 | } | 1232 | } |
1199 | assert(fd[i][counter] >= 0); | ||
1200 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
1201 | |||
1202 | /* | ||
1203 | * First counter acts as the group leader: | ||
1204 | */ | ||
1205 | if (group && group_fd == -1) | ||
1206 | group_fd = fd[i][counter]; | ||
1207 | |||
1208 | event_array[nr_poll].fd = fd[i][counter]; | ||
1209 | event_array[nr_poll].events = POLLIN; | ||
1210 | nr_poll++; | ||
1211 | |||
1212 | mmap_array[i][counter].counter = counter; | ||
1213 | mmap_array[i][counter].prev = 0; | ||
1214 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | ||
1215 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
1216 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | ||
1217 | if (mmap_array[i][counter].base == MAP_FAILED) | ||
1218 | die("failed to mmap with %d (%s)\n", errno, strerror(errno)); | ||
1219 | } | 1233 | } |
1220 | 1234 | ||
1221 | static int __cmd_top(void) | 1235 | static int __cmd_top(void) |
@@ -1231,8 +1245,8 @@ static int __cmd_top(void) | |||
1231 | if (session == NULL) | 1245 | if (session == NULL) |
1232 | return -ENOMEM; | 1246 | return -ENOMEM; |
1233 | 1247 | ||
1234 | if (target_pid != -1) | 1248 | if (target_tid != -1) |
1235 | event__synthesize_thread(target_pid, event__process, session); | 1249 | event__synthesize_thread(target_tid, event__process, session); |
1236 | else | 1250 | else |
1237 | event__synthesize_threads(event__process, session); | 1251 | event__synthesize_threads(event__process, session); |
1238 | 1252 | ||
@@ -1243,7 +1257,7 @@ static int __cmd_top(void) | |||
1243 | } | 1257 | } |
1244 | 1258 | ||
1245 | /* Wait for a minimal set of events before starting the snapshot */ | 1259 | /* Wait for a minimal set of events before starting the snapshot */ |
1246 | poll(event_array, nr_poll, 100); | 1260 | poll(&event_array[0], nr_poll, 100); |
1247 | 1261 | ||
1248 | perf_session__mmap_read(session); | 1262 | perf_session__mmap_read(session); |
1249 | 1263 | ||
@@ -1286,7 +1300,9 @@ static const struct option options[] = { | |||
1286 | OPT_INTEGER('c', "count", &default_interval, | 1300 | OPT_INTEGER('c', "count", &default_interval, |
1287 | "event period to sample"), | 1301 | "event period to sample"), |
1288 | OPT_INTEGER('p', "pid", &target_pid, | 1302 | OPT_INTEGER('p', "pid", &target_pid, |
1289 | "profile events on existing pid"), | 1303 | "profile events on existing process id"), |
1304 | OPT_INTEGER('t', "tid", &target_tid, | ||
1305 | "profile events on existing thread id"), | ||
1290 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1306 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1291 | "system-wide collection from all CPUs"), | 1307 | "system-wide collection from all CPUs"), |
1292 | OPT_INTEGER('C', "CPU", &profile_cpu, | 1308 | OPT_INTEGER('C', "CPU", &profile_cpu, |
@@ -1327,6 +1343,7 @@ static const struct option options[] = { | |||
1327 | int cmd_top(int argc, const char **argv, const char *prefix __used) | 1343 | int cmd_top(int argc, const char **argv, const char *prefix __used) |
1328 | { | 1344 | { |
1329 | int counter; | 1345 | int counter; |
1346 | int i,j; | ||
1330 | 1347 | ||
1331 | page_size = sysconf(_SC_PAGE_SIZE); | 1348 | page_size = sysconf(_SC_PAGE_SIZE); |
1332 | 1349 | ||
@@ -1334,8 +1351,39 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1334 | if (argc) | 1351 | if (argc) |
1335 | usage_with_options(top_usage, options); | 1352 | usage_with_options(top_usage, options); |
1336 | 1353 | ||
1354 | if (target_pid != -1) { | ||
1355 | target_tid = target_pid; | ||
1356 | thread_num = find_all_tid(target_pid, &all_tids); | ||
1357 | if (thread_num <= 0) { | ||
1358 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
1359 | target_pid); | ||
1360 | usage_with_options(top_usage, options); | ||
1361 | } | ||
1362 | } else { | ||
1363 | all_tids=malloc(sizeof(pid_t)); | ||
1364 | if (!all_tids) | ||
1365 | return -ENOMEM; | ||
1366 | |||
1367 | all_tids[0] = target_tid; | ||
1368 | thread_num = 1; | ||
1369 | } | ||
1370 | |||
1371 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
1372 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
1373 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
1374 | mmap_array[i][j] = malloc( | ||
1375 | sizeof(struct mmap_data)*thread_num); | ||
1376 | if (!fd[i][j] || !mmap_array[i][j]) | ||
1377 | return -ENOMEM; | ||
1378 | } | ||
1379 | } | ||
1380 | event_array = malloc( | ||
1381 | sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); | ||
1382 | if (!event_array) | ||
1383 | return -ENOMEM; | ||
1384 | |||
1337 | /* CPU and PID are mutually exclusive */ | 1385 | /* CPU and PID are mutually exclusive */ |
1338 | if (target_pid != -1 && profile_cpu != -1) { | 1386 | if (target_tid > 0 && profile_cpu != -1) { |
1339 | printf("WARNING: PID switch overriding CPU\n"); | 1387 | printf("WARNING: PID switch overriding CPU\n"); |
1340 | sleep(1); | 1388 | sleep(1); |
1341 | profile_cpu = -1; | 1389 | profile_cpu = -1; |
@@ -1376,7 +1424,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1376 | attrs[counter].sample_period = default_interval; | 1424 | attrs[counter].sample_period = default_interval; |
1377 | } | 1425 | } |
1378 | 1426 | ||
1379 | if (target_pid != -1 || profile_cpu != -1) | 1427 | if (target_tid != -1 || profile_cpu != -1) |
1380 | nr_cpus = 1; | 1428 | nr_cpus = 1; |
1381 | else | 1429 | else |
1382 | nr_cpus = read_cpu_map(); | 1430 | nr_cpus = read_cpu_map(); |