diff options
Diffstat (limited to 'tools/power')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 99 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 245 |
2 files changed, 224 insertions, 120 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 555c69a5592a..adf175f61496 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -4,11 +4,13 @@ turbostat \- Report processor frequency and idle statistics | |||
4 | .SH SYNOPSIS | 4 | .SH SYNOPSIS |
5 | .ft B | 5 | .ft B |
6 | .B turbostat | 6 | .B turbostat |
7 | .RB [ "\-s" ] | ||
7 | .RB [ "\-v" ] | 8 | .RB [ "\-v" ] |
8 | .RB [ "\-M MSR#" ] | 9 | .RB [ "\-M MSR#" ] |
9 | .RB command | 10 | .RB command |
10 | .br | 11 | .br |
11 | .B turbostat | 12 | .B turbostat |
13 | .RB [ "\-s" ] | ||
12 | .RB [ "\-v" ] | 14 | .RB [ "\-v" ] |
13 | .RB [ "\-M MSR#" ] | 15 | .RB [ "\-M MSR#" ] |
14 | .RB [ "\-i interval_sec" ] | 16 | .RB [ "\-i interval_sec" ] |
@@ -25,6 +27,8 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. | |||
25 | on processors that additionally support C-state residency counters. | 27 | on processors that additionally support C-state residency counters. |
26 | 28 | ||
27 | .SS Options | 29 | .SS Options |
30 | The \fB-s\fP option prints only a 1-line summary for each sample interval. | ||
31 | .PP | ||
28 | The \fB-v\fP option increases verbosity. | 32 | The \fB-v\fP option increases verbosity. |
29 | .PP | 33 | .PP |
30 | The \fB-M MSR#\fP option dumps the specified MSR, | 34 | The \fB-M MSR#\fP option dumps the specified MSR, |
@@ -39,13 +43,14 @@ displays the statistics gathered since it was forked. | |||
39 | .SH FIELD DESCRIPTIONS | 43 | .SH FIELD DESCRIPTIONS |
40 | .nf | 44 | .nf |
41 | \fBpk\fP processor package number. | 45 | \fBpk\fP processor package number. |
42 | \fBcr\fP processor core number. | 46 | \fBcor\fP processor core number. |
43 | \fBCPU\fP Linux CPU (logical processor) number. | 47 | \fBCPU\fP Linux CPU (logical processor) number. |
48 | Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. | ||
44 | \fB%c0\fP percent of the interval that the CPU retired instructions. | 49 | \fB%c0\fP percent of the interval that the CPU retired instructions. |
45 | \fBGHz\fP average clock rate while the CPU was in c0 state. | 50 | \fBGHz\fP average clock rate while the CPU was in c0 state. |
46 | \fBTSC\fP average GHz that the TSC ran during the entire interval. | 51 | \fBTSC\fP average GHz that the TSC ran during the entire interval. |
47 | \fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states. | 52 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. |
48 | \fB%pc3, %pc6\fP percentage residency in hardware package idle states. | 53 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. |
49 | .fi | 54 | .fi |
50 | .PP | 55 | .PP |
51 | .SH EXAMPLE | 56 | .SH EXAMPLE |
@@ -53,25 +58,37 @@ Without any parameters, turbostat prints out counters ever 5 seconds. | |||
53 | (override interval with "-i sec" option, or specify a command | 58 | (override interval with "-i sec" option, or specify a command |
54 | for turbostat to fork). | 59 | for turbostat to fork). |
55 | 60 | ||
56 | The first row of statistics reflect the average for the entire system. | 61 | The first row of statistics is a summary for the entire system. |
62 | Note that the summary is a weighted average. | ||
57 | Subsequent rows show per-CPU statistics. | 63 | Subsequent rows show per-CPU statistics. |
58 | 64 | ||
59 | .nf | 65 | .nf |
60 | [root@x980]# ./turbostat | 66 | [root@x980]# ./turbostat |
61 | cr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 67 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
62 | 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 | 68 | 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 |
63 | 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 | 69 | 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 |
64 | 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 | 70 | 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 |
65 | 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07 | 71 | 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 |
66 | 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07 | 72 | 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 |
67 | 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07 | 73 | 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 |
68 | 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07 | 74 | 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 |
69 | 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07 | 75 | 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 |
70 | 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07 | 76 | 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 |
71 | 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 | 77 | 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 |
72 | 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 | 78 | 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 |
73 | 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07 | 79 | 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 |
74 | 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07 | 80 | 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 |
81 | .fi | ||
82 | .SH SUMMARY EXAMPLE | ||
83 | The "-s" option prints the column headers just once, | ||
84 | and then the one line system summary for each sample interval. | ||
85 | |||
86 | .nf | ||
87 | [root@x980]# ./turbostat -s | ||
88 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | ||
89 | 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 | ||
90 | 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 | ||
91 | 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 | ||
75 | .fi | 92 | .fi |
76 | .SH VERBOSE EXAMPLE | 93 | .SH VERBOSE EXAMPLE |
77 | The "-v" option adds verbosity to the output: | 94 | The "-v" option adds verbosity to the output: |
@@ -101,33 +118,33 @@ until ^C while the other CPUs are mostly idle: | |||
101 | 118 | ||
102 | .nf | 119 | .nf |
103 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null | 120 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null |
104 | 121 | ^C | |
105 | ^Ccr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 122 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
106 | 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 | 123 | 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 |
107 | 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 | 124 | 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 |
108 | 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 | 125 | 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 |
109 | 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00 | 126 | 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 |
110 | 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00 | 127 | 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 |
111 | 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00 | 128 | 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 |
112 | 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00 | 129 | 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 |
113 | 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00 | 130 | 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 |
114 | 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00 | 131 | 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 |
115 | 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00 | 132 | 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 |
116 | 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00 | 133 | 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 |
117 | 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00 | 134 | 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 |
118 | 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00 | 135 | 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 |
119 | 6.950866 sec | 136 | 4.907015 sec |
120 | 137 | ||
121 | .fi | 138 | .fi |
122 | Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit | 139 | Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit |
123 | while the other processors are generally in various states of idle. | 140 | while the other processors are generally in various states of idle. |
124 | 141 | ||
125 | Note that cpu3 is an HT sibling sharing core9 | 142 | Note that cpu0 is an HT sibling sharing core0 |
126 | with cpu9, and thus it is unable to get to an idle state | 143 | with cpu6, and thus it is unable to get to an idle state |
127 | deeper than c1 while cpu9 is busy. | 144 | deeper than c1 while cpu6 is busy. |
128 | 145 | ||
129 | Note that turbostat reports average GHz of 3.61, while | 146 | Note that turbostat reports average GHz of 3.64, while |
130 | the arithmetic average of the GHz column above is 3.24. | 147 | the arithmetic average of the GHz column above is lower. |
131 | This is a weighted average, where the weight is %c0. ie. it is the total number of | 148 | This is a weighted average, where the weight is %c0. ie. it is the total number of |
132 | un-halted cycles elapsed per time divided by the number of CPUs. | 149 | un-halted cycles elapsed per time divided by the number of CPUs. |
133 | .SH NOTES | 150 | .SH NOTES |
@@ -167,6 +184,6 @@ http://www.intel.com/products/processor/manuals/ | |||
167 | .SH "SEE ALSO" | 184 | .SH "SEE ALSO" |
168 | msr(4), vmstat(8) | 185 | msr(4), vmstat(8) |
169 | .PP | 186 | .PP |
170 | .SH AUTHORS | 187 | .SH AUTHOR |
171 | .nf | 188 | .nf |
172 | Written by Len Brown <len.brown@intel.com> | 189 | Written by Len Brown <len.brown@intel.com> |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 310d3dd5e547..ab2f682fd44c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * turbostat -- show CPU frequency and C-state residency | 2 | * turbostat -- show CPU frequency and C-state residency |
3 | * on modern Intel turbo-capable processors. | 3 | * on modern Intel turbo-capable processors. |
4 | * | 4 | * |
5 | * Copyright (c) 2010, Intel Corporation. | 5 | * Copyright (c) 2012 Intel Corporation. |
6 | * Len Brown <len.brown@intel.com> | 6 | * Len Brown <len.brown@intel.com> |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
@@ -19,6 +19,7 @@ | |||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | 19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #define _GNU_SOURCE | ||
22 | #include <stdio.h> | 23 | #include <stdio.h> |
23 | #include <unistd.h> | 24 | #include <unistd.h> |
24 | #include <sys/types.h> | 25 | #include <sys/types.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include <dirent.h> | 33 | #include <dirent.h> |
33 | #include <string.h> | 34 | #include <string.h> |
34 | #include <ctype.h> | 35 | #include <ctype.h> |
36 | #include <sched.h> | ||
35 | 37 | ||
36 | #define MSR_TSC 0x10 | 38 | #define MSR_TSC 0x10 |
37 | #define MSR_NEHALEM_PLATFORM_INFO 0xCE | 39 | #define MSR_NEHALEM_PLATFORM_INFO 0xCE |
@@ -49,6 +51,7 @@ | |||
49 | char *proc_stat = "/proc/stat"; | 51 | char *proc_stat = "/proc/stat"; |
50 | unsigned int interval_sec = 5; /* set with -i interval_sec */ | 52 | unsigned int interval_sec = 5; /* set with -i interval_sec */ |
51 | unsigned int verbose; /* set with -v */ | 53 | unsigned int verbose; /* set with -v */ |
54 | unsigned int summary_only; /* set with -s */ | ||
52 | unsigned int skip_c0; | 55 | unsigned int skip_c0; |
53 | unsigned int skip_c1; | 56 | unsigned int skip_c1; |
54 | unsigned int do_nhm_cstates; | 57 | unsigned int do_nhm_cstates; |
@@ -68,9 +71,10 @@ unsigned int show_cpu; | |||
68 | int aperf_mperf_unstable; | 71 | int aperf_mperf_unstable; |
69 | int backwards_count; | 72 | int backwards_count; |
70 | char *progname; | 73 | char *progname; |
71 | int need_reinitialize; | ||
72 | 74 | ||
73 | int num_cpus; | 75 | int num_cpus; |
76 | cpu_set_t *cpu_mask; | ||
77 | size_t cpu_mask_size; | ||
74 | 78 | ||
75 | struct counters { | 79 | struct counters { |
76 | unsigned long long tsc; /* per thread */ | 80 | unsigned long long tsc; /* per thread */ |
@@ -99,44 +103,76 @@ struct timeval tv_even; | |||
99 | struct timeval tv_odd; | 103 | struct timeval tv_odd; |
100 | struct timeval tv_delta; | 104 | struct timeval tv_delta; |
101 | 105 | ||
102 | unsigned long long get_msr(int cpu, off_t offset) | 106 | /* |
107 | * cpu_mask_init(ncpus) | ||
108 | * | ||
109 | * allocate and clear cpu_mask | ||
110 | * set cpu_mask_size | ||
111 | */ | ||
112 | void cpu_mask_init(int ncpus) | ||
113 | { | ||
114 | cpu_mask = CPU_ALLOC(ncpus); | ||
115 | if (cpu_mask == NULL) { | ||
116 | perror("CPU_ALLOC"); | ||
117 | exit(3); | ||
118 | } | ||
119 | cpu_mask_size = CPU_ALLOC_SIZE(ncpus); | ||
120 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | ||
121 | } | ||
122 | |||
123 | void cpu_mask_uninit() | ||
124 | { | ||
125 | CPU_FREE(cpu_mask); | ||
126 | cpu_mask = NULL; | ||
127 | cpu_mask_size = 0; | ||
128 | } | ||
129 | |||
130 | int cpu_migrate(int cpu) | ||
131 | { | ||
132 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | ||
133 | CPU_SET_S(cpu, cpu_mask_size, cpu_mask); | ||
134 | if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) | ||
135 | return -1; | ||
136 | else | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | int get_msr(int cpu, off_t offset, unsigned long long *msr) | ||
103 | { | 141 | { |
104 | ssize_t retval; | 142 | ssize_t retval; |
105 | unsigned long long msr; | ||
106 | char pathname[32]; | 143 | char pathname[32]; |
107 | int fd; | 144 | int fd; |
108 | 145 | ||
109 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); | 146 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); |
110 | fd = open(pathname, O_RDONLY); | 147 | fd = open(pathname, O_RDONLY); |
111 | if (fd < 0) { | 148 | if (fd < 0) |
112 | perror(pathname); | 149 | return -1; |
113 | need_reinitialize = 1; | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | retval = pread(fd, &msr, sizeof msr, offset); | ||
118 | if (retval != sizeof msr) { | ||
119 | fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", | ||
120 | cpu, offset, retval); | ||
121 | exit(-2); | ||
122 | } | ||
123 | 150 | ||
151 | retval = pread(fd, msr, sizeof *msr, offset); | ||
124 | close(fd); | 152 | close(fd); |
125 | return msr; | 153 | |
154 | if (retval != sizeof *msr) | ||
155 | return -1; | ||
156 | |||
157 | return 0; | ||
126 | } | 158 | } |
127 | 159 | ||
128 | void print_header(void) | 160 | void print_header(void) |
129 | { | 161 | { |
130 | if (show_pkg) | 162 | if (show_pkg) |
131 | fprintf(stderr, "pk"); | 163 | fprintf(stderr, "pk"); |
164 | if (show_pkg) | ||
165 | fprintf(stderr, " "); | ||
132 | if (show_core) | 166 | if (show_core) |
133 | fprintf(stderr, " cr"); | 167 | fprintf(stderr, "cor"); |
134 | if (show_cpu) | 168 | if (show_cpu) |
135 | fprintf(stderr, " CPU"); | 169 | fprintf(stderr, " CPU"); |
170 | if (show_pkg || show_core || show_cpu) | ||
171 | fprintf(stderr, " "); | ||
136 | if (do_nhm_cstates) | 172 | if (do_nhm_cstates) |
137 | fprintf(stderr, " %%c0 "); | 173 | fprintf(stderr, " %%c0"); |
138 | if (has_aperf) | 174 | if (has_aperf) |
139 | fprintf(stderr, " GHz"); | 175 | fprintf(stderr, " GHz"); |
140 | fprintf(stderr, " TSC"); | 176 | fprintf(stderr, " TSC"); |
141 | if (do_nhm_cstates) | 177 | if (do_nhm_cstates) |
142 | fprintf(stderr, " %%c1"); | 178 | fprintf(stderr, " %%c1"); |
@@ -147,13 +183,13 @@ void print_header(void) | |||
147 | if (do_snb_cstates) | 183 | if (do_snb_cstates) |
148 | fprintf(stderr, " %%c7"); | 184 | fprintf(stderr, " %%c7"); |
149 | if (do_snb_cstates) | 185 | if (do_snb_cstates) |
150 | fprintf(stderr, " %%pc2"); | 186 | fprintf(stderr, " %%pc2"); |
151 | if (do_nhm_cstates) | 187 | if (do_nhm_cstates) |
152 | fprintf(stderr, " %%pc3"); | 188 | fprintf(stderr, " %%pc3"); |
153 | if (do_nhm_cstates) | 189 | if (do_nhm_cstates) |
154 | fprintf(stderr, " %%pc6"); | 190 | fprintf(stderr, " %%pc6"); |
155 | if (do_snb_cstates) | 191 | if (do_snb_cstates) |
156 | fprintf(stderr, " %%pc7"); | 192 | fprintf(stderr, " %%pc7"); |
157 | if (extra_msr_offset) | 193 | if (extra_msr_offset) |
158 | fprintf(stderr, " MSR 0x%x ", extra_msr_offset); | 194 | fprintf(stderr, " MSR 0x%x ", extra_msr_offset); |
159 | 195 | ||
@@ -187,6 +223,15 @@ void dump_list(struct counters *cnt) | |||
187 | dump_cnt(cnt); | 223 | dump_cnt(cnt); |
188 | } | 224 | } |
189 | 225 | ||
226 | /* | ||
227 | * column formatting convention & formats | ||
228 | * package: "pk" 2 columns %2d | ||
229 | * core: "cor" 3 columns %3d | ||
230 | * CPU: "CPU" 3 columns %3d | ||
231 | * GHz: "GHz" 3 columns %3.2 | ||
232 | * TSC: "TSC" 3 columns %3.2 | ||
233 | * percentage " %pc3" %6.2 | ||
234 | */ | ||
190 | void print_cnt(struct counters *p) | 235 | void print_cnt(struct counters *p) |
191 | { | 236 | { |
192 | double interval_float; | 237 | double interval_float; |
@@ -196,39 +241,45 @@ void print_cnt(struct counters *p) | |||
196 | /* topology columns, print blanks on 1st (average) line */ | 241 | /* topology columns, print blanks on 1st (average) line */ |
197 | if (p == cnt_average) { | 242 | if (p == cnt_average) { |
198 | if (show_pkg) | 243 | if (show_pkg) |
244 | fprintf(stderr, " "); | ||
245 | if (show_pkg && show_core) | ||
199 | fprintf(stderr, " "); | 246 | fprintf(stderr, " "); |
200 | if (show_core) | 247 | if (show_core) |
201 | fprintf(stderr, " "); | 248 | fprintf(stderr, " "); |
202 | if (show_cpu) | 249 | if (show_cpu) |
203 | fprintf(stderr, " "); | 250 | fprintf(stderr, " " " "); |
204 | } else { | 251 | } else { |
205 | if (show_pkg) | 252 | if (show_pkg) |
206 | fprintf(stderr, "%d", p->pkg); | 253 | fprintf(stderr, "%2d", p->pkg); |
254 | if (show_pkg && show_core) | ||
255 | fprintf(stderr, " "); | ||
207 | if (show_core) | 256 | if (show_core) |
208 | fprintf(stderr, "%4d", p->core); | 257 | fprintf(stderr, "%3d", p->core); |
209 | if (show_cpu) | 258 | if (show_cpu) |
210 | fprintf(stderr, "%4d", p->cpu); | 259 | fprintf(stderr, " %3d", p->cpu); |
211 | } | 260 | } |
212 | 261 | ||
213 | /* %c0 */ | 262 | /* %c0 */ |
214 | if (do_nhm_cstates) { | 263 | if (do_nhm_cstates) { |
264 | if (show_pkg || show_core || show_cpu) | ||
265 | fprintf(stderr, " "); | ||
215 | if (!skip_c0) | 266 | if (!skip_c0) |
216 | fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc); | 267 | fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); |
217 | else | 268 | else |
218 | fprintf(stderr, " ****"); | 269 | fprintf(stderr, " ****"); |
219 | } | 270 | } |
220 | 271 | ||
221 | /* GHz */ | 272 | /* GHz */ |
222 | if (has_aperf) { | 273 | if (has_aperf) { |
223 | if (!aperf_mperf_unstable) { | 274 | if (!aperf_mperf_unstable) { |
224 | fprintf(stderr, "%5.2f", | 275 | fprintf(stderr, " %3.2f", |
225 | 1.0 * p->tsc / units * p->aperf / | 276 | 1.0 * p->tsc / units * p->aperf / |
226 | p->mperf / interval_float); | 277 | p->mperf / interval_float); |
227 | } else { | 278 | } else { |
228 | if (p->aperf > p->tsc || p->mperf > p->tsc) { | 279 | if (p->aperf > p->tsc || p->mperf > p->tsc) { |
229 | fprintf(stderr, " ****"); | 280 | fprintf(stderr, " ***"); |
230 | } else { | 281 | } else { |
231 | fprintf(stderr, "%4.1f*", | 282 | fprintf(stderr, "%3.1f*", |
232 | 1.0 * p->tsc / | 283 | 1.0 * p->tsc / |
233 | units * p->aperf / | 284 | units * p->aperf / |
234 | p->mperf / interval_float); | 285 | p->mperf / interval_float); |
@@ -241,7 +292,7 @@ void print_cnt(struct counters *p) | |||
241 | 292 | ||
242 | if (do_nhm_cstates) { | 293 | if (do_nhm_cstates) { |
243 | if (!skip_c1) | 294 | if (!skip_c1) |
244 | fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); | 295 | fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); |
245 | else | 296 | else |
246 | fprintf(stderr, " ****"); | 297 | fprintf(stderr, " ****"); |
247 | } | 298 | } |
@@ -252,13 +303,13 @@ void print_cnt(struct counters *p) | |||
252 | if (do_snb_cstates) | 303 | if (do_snb_cstates) |
253 | fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); | 304 | fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); |
254 | if (do_snb_cstates) | 305 | if (do_snb_cstates) |
255 | fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); | 306 | fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); |
256 | if (do_nhm_cstates) | 307 | if (do_nhm_cstates) |
257 | fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); | 308 | fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); |
258 | if (do_nhm_cstates) | 309 | if (do_nhm_cstates) |
259 | fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); | 310 | fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); |
260 | if (do_snb_cstates) | 311 | if (do_snb_cstates) |
261 | fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); | 312 | fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); |
262 | if (extra_msr_offset) | 313 | if (extra_msr_offset) |
263 | fprintf(stderr, " 0x%016llx", p->extra_msr); | 314 | fprintf(stderr, " 0x%016llx", p->extra_msr); |
264 | putc('\n', stderr); | 315 | putc('\n', stderr); |
@@ -267,12 +318,20 @@ void print_cnt(struct counters *p) | |||
267 | void print_counters(struct counters *counters) | 318 | void print_counters(struct counters *counters) |
268 | { | 319 | { |
269 | struct counters *cnt; | 320 | struct counters *cnt; |
321 | static int printed; | ||
322 | |||
270 | 323 | ||
271 | print_header(); | 324 | if (!printed || !summary_only) |
325 | print_header(); | ||
272 | 326 | ||
273 | if (num_cpus > 1) | 327 | if (num_cpus > 1) |
274 | print_cnt(cnt_average); | 328 | print_cnt(cnt_average); |
275 | 329 | ||
330 | printed = 1; | ||
331 | |||
332 | if (summary_only) | ||
333 | return; | ||
334 | |||
276 | for (cnt = counters; cnt != NULL; cnt = cnt->next) | 335 | for (cnt = counters; cnt != NULL; cnt = cnt->next) |
277 | print_cnt(cnt); | 336 | print_cnt(cnt); |
278 | 337 | ||
@@ -440,31 +499,51 @@ void compute_average(struct counters *delta, struct counters *avg) | |||
440 | free(sum); | 499 | free(sum); |
441 | } | 500 | } |
442 | 501 | ||
443 | void get_counters(struct counters *cnt) | 502 | int get_counters(struct counters *cnt) |
444 | { | 503 | { |
445 | for ( ; cnt; cnt = cnt->next) { | 504 | for ( ; cnt; cnt = cnt->next) { |
446 | cnt->tsc = get_msr(cnt->cpu, MSR_TSC); | 505 | |
447 | if (do_nhm_cstates) | 506 | if (cpu_migrate(cnt->cpu)) |
448 | cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); | 507 | return -1; |
449 | if (do_nhm_cstates) | 508 | |
450 | cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY); | 509 | if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) |
451 | if (do_snb_cstates) | 510 | return -1; |
452 | cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY); | 511 | |
453 | if (has_aperf) | 512 | if (has_aperf) { |
454 | cnt->aperf = get_msr(cnt->cpu, MSR_APERF); | 513 | if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) |
455 | if (has_aperf) | 514 | return -1; |
456 | cnt->mperf = get_msr(cnt->cpu, MSR_MPERF); | 515 | if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) |
457 | if (do_snb_cstates) | 516 | return -1; |
458 | cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY); | 517 | } |
459 | if (do_nhm_cstates) | 518 | |
460 | cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY); | 519 | if (do_nhm_cstates) { |
461 | if (do_nhm_cstates) | 520 | if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) |
462 | cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY); | 521 | return -1; |
522 | if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) | ||
523 | return -1; | ||
524 | } | ||
525 | |||
463 | if (do_snb_cstates) | 526 | if (do_snb_cstates) |
464 | cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY); | 527 | if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) |
528 | return -1; | ||
529 | |||
530 | if (do_nhm_cstates) { | ||
531 | if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) | ||
532 | return -1; | ||
533 | if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) | ||
534 | return -1; | ||
535 | } | ||
536 | if (do_snb_cstates) { | ||
537 | if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) | ||
538 | return -1; | ||
539 | if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) | ||
540 | return -1; | ||
541 | } | ||
465 | if (extra_msr_offset) | 542 | if (extra_msr_offset) |
466 | cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset); | 543 | if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) |
544 | return -1; | ||
467 | } | 545 | } |
546 | return 0; | ||
468 | } | 547 | } |
469 | 548 | ||
470 | void print_nehalem_info(void) | 549 | void print_nehalem_info(void) |
@@ -475,7 +554,7 @@ void print_nehalem_info(void) | |||
475 | if (!do_nehalem_platform_info) | 554 | if (!do_nehalem_platform_info) |
476 | return; | 555 | return; |
477 | 556 | ||
478 | msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); | 557 | get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); |
479 | 558 | ||
480 | ratio = (msr >> 40) & 0xFF; | 559 | ratio = (msr >> 40) & 0xFF; |
481 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", | 560 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", |
@@ -491,7 +570,7 @@ void print_nehalem_info(void) | |||
491 | if (!do_nehalem_turbo_ratio_limit) | 570 | if (!do_nehalem_turbo_ratio_limit) |
492 | return; | 571 | return; |
493 | 572 | ||
494 | msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); | 573 | get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); |
495 | 574 | ||
496 | ratio = (msr >> 24) & 0xFF; | 575 | ratio = (msr >> 24) & 0xFF; |
497 | if (ratio) | 576 | if (ratio) |
@@ -557,7 +636,8 @@ void insert_counters(struct counters **list, | |||
557 | return; | 636 | return; |
558 | } | 637 | } |
559 | 638 | ||
560 | show_cpu = 1; /* there is more than one CPU */ | 639 | if (!summary_only) |
640 | show_cpu = 1; /* there is more than one CPU */ | ||
561 | 641 | ||
562 | /* | 642 | /* |
563 | * insert on front of list. | 643 | * insert on front of list. |
@@ -575,13 +655,15 @@ void insert_counters(struct counters **list, | |||
575 | 655 | ||
576 | while (prev->next && (prev->next->pkg < new->pkg)) { | 656 | while (prev->next && (prev->next->pkg < new->pkg)) { |
577 | prev = prev->next; | 657 | prev = prev->next; |
578 | show_pkg = 1; /* there is more than 1 package */ | 658 | if (!summary_only) |
659 | show_pkg = 1; /* there is more than 1 package */ | ||
579 | } | 660 | } |
580 | 661 | ||
581 | while (prev->next && (prev->next->pkg == new->pkg) | 662 | while (prev->next && (prev->next->pkg == new->pkg) |
582 | && (prev->next->core < new->core)) { | 663 | && (prev->next->core < new->core)) { |
583 | prev = prev->next; | 664 | prev = prev->next; |
584 | show_core = 1; /* there is more than 1 core */ | 665 | if (!summary_only) |
666 | show_core = 1; /* there is more than 1 core */ | ||
585 | } | 667 | } |
586 | 668 | ||
587 | while (prev->next && (prev->next->pkg == new->pkg) | 669 | while (prev->next && (prev->next->pkg == new->pkg) |
@@ -681,7 +763,7 @@ int get_core_id(int cpu) | |||
681 | } | 763 | } |
682 | 764 | ||
683 | /* | 765 | /* |
684 | * run func(index, cpu) on every cpu in /proc/stat | 766 | * run func(pkg, core, cpu) on every cpu in /proc/stat |
685 | */ | 767 | */ |
686 | 768 | ||
687 | int for_all_cpus(void (func)(int, int, int)) | 769 | int for_all_cpus(void (func)(int, int, int)) |
@@ -717,18 +799,18 @@ int for_all_cpus(void (func)(int, int, int)) | |||
717 | 799 | ||
718 | void re_initialize(void) | 800 | void re_initialize(void) |
719 | { | 801 | { |
720 | printf("turbostat: topology changed, re-initializing.\n"); | ||
721 | free_all_counters(); | 802 | free_all_counters(); |
722 | num_cpus = for_all_cpus(alloc_new_counters); | 803 | num_cpus = for_all_cpus(alloc_new_counters); |
723 | need_reinitialize = 0; | 804 | cpu_mask_uninit(); |
724 | printf("num_cpus is now %d\n", num_cpus); | 805 | cpu_mask_init(num_cpus); |
806 | printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); | ||
725 | } | 807 | } |
726 | 808 | ||
727 | void dummy(int pkg, int core, int cpu) { return; } | 809 | void dummy(int pkg, int core, int cpu) { return; } |
728 | /* | 810 | /* |
729 | * check to see if a cpu came on-line | 811 | * check to see if a cpu came on-line |
730 | */ | 812 | */ |
731 | void verify_num_cpus(void) | 813 | int verify_num_cpus(void) |
732 | { | 814 | { |
733 | int new_num_cpus; | 815 | int new_num_cpus; |
734 | 816 | ||
@@ -738,8 +820,9 @@ void verify_num_cpus(void) | |||
738 | if (verbose) | 820 | if (verbose) |
739 | printf("num_cpus was %d, is now %d\n", | 821 | printf("num_cpus was %d, is now %d\n", |
740 | num_cpus, new_num_cpus); | 822 | num_cpus, new_num_cpus); |
741 | need_reinitialize = 1; | 823 | return -1; |
742 | } | 824 | } |
825 | return 0; | ||
743 | } | 826 | } |
744 | 827 | ||
745 | void turbostat_loop() | 828 | void turbostat_loop() |
@@ -749,25 +832,25 @@ restart: | |||
749 | gettimeofday(&tv_even, (struct timezone *)NULL); | 832 | gettimeofday(&tv_even, (struct timezone *)NULL); |
750 | 833 | ||
751 | while (1) { | 834 | while (1) { |
752 | verify_num_cpus(); | 835 | if (verify_num_cpus()) { |
753 | if (need_reinitialize) { | ||
754 | re_initialize(); | 836 | re_initialize(); |
755 | goto restart; | 837 | goto restart; |
756 | } | 838 | } |
757 | sleep(interval_sec); | 839 | sleep(interval_sec); |
758 | get_counters(cnt_odd); | 840 | if (get_counters(cnt_odd)) { |
841 | re_initialize(); | ||
842 | goto restart; | ||
843 | } | ||
759 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 844 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
760 | |||
761 | compute_delta(cnt_odd, cnt_even, cnt_delta); | 845 | compute_delta(cnt_odd, cnt_even, cnt_delta); |
762 | timersub(&tv_odd, &tv_even, &tv_delta); | 846 | timersub(&tv_odd, &tv_even, &tv_delta); |
763 | compute_average(cnt_delta, cnt_average); | 847 | compute_average(cnt_delta, cnt_average); |
764 | print_counters(cnt_delta); | 848 | print_counters(cnt_delta); |
765 | if (need_reinitialize) { | 849 | sleep(interval_sec); |
850 | if (get_counters(cnt_even)) { | ||
766 | re_initialize(); | 851 | re_initialize(); |
767 | goto restart; | 852 | goto restart; |
768 | } | 853 | } |
769 | sleep(interval_sec); | ||
770 | get_counters(cnt_even); | ||
771 | gettimeofday(&tv_even, (struct timezone *)NULL); | 854 | gettimeofday(&tv_even, (struct timezone *)NULL); |
772 | compute_delta(cnt_even, cnt_odd, cnt_delta); | 855 | compute_delta(cnt_even, cnt_odd, cnt_delta); |
773 | timersub(&tv_even, &tv_odd, &tv_delta); | 856 | timersub(&tv_even, &tv_odd, &tv_delta); |
@@ -953,6 +1036,7 @@ void turbostat_init() | |||
953 | check_super_user(); | 1036 | check_super_user(); |
954 | 1037 | ||
955 | num_cpus = for_all_cpus(alloc_new_counters); | 1038 | num_cpus = for_all_cpus(alloc_new_counters); |
1039 | cpu_mask_init(num_cpus); | ||
956 | 1040 | ||
957 | if (verbose) | 1041 | if (verbose) |
958 | print_nehalem_info(); | 1042 | print_nehalem_info(); |
@@ -1005,8 +1089,11 @@ void cmdline(int argc, char **argv) | |||
1005 | 1089 | ||
1006 | progname = argv[0]; | 1090 | progname = argv[0]; |
1007 | 1091 | ||
1008 | while ((opt = getopt(argc, argv, "+vi:M:")) != -1) { | 1092 | while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { |
1009 | switch (opt) { | 1093 | switch (opt) { |
1094 | case 's': | ||
1095 | summary_only++; | ||
1096 | break; | ||
1010 | case 'v': | 1097 | case 'v': |
1011 | verbose++; | 1098 | verbose++; |
1012 | break; | 1099 | break; |