aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 17:28:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-26 17:28:55 -0400
commit476525004ac7e2f990b6956efcd44d0780c2ab4c (patch)
tree158cd2bbfb232b4f4327b6c20a4e14c6b095a438 /tools
parentbd22dc17e49973d3d4925970260e9e37f7580a9f (diff)
parentec033d0a02901551346b9f43f8ff9bad51378891 (diff)
Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull ACPI & power management update from Len Brown: "Re-write of the turbostat tool. lower overhead was necessary for measuring very large system when they are very idle. IVB support in intel_idle It's what I run on my IVB, others should be able to also:-) ACPICA core update We have found some bugs due to divergence between Linux and the upstream ACPICA base. Most of these patches are to reduce that divergence to reduce the risk of future bugs. Some cpuidle updates, mostly for non-Intel More will be coming, as they depend on this part. Some thermal management changes needed by non-ACPI systems. Some _OST (OS Status Indication) updates for hot ACPI hot-plug." * 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: (51 commits) Thermal: Documentation update Thermal: Add Hysteresis attributes Thermal: Make Thermal trip points writeable ACPI/AC: prevent OOPS on some boxes due to missing check power_supply_register() return value check tools/power: turbostat: fix large c1% issue tools/power: turbostat v2 - re-write for efficiency ACPICA: Update to version 20120711 ACPICA: AcpiSrc: Fix some translation issues for Linux conversion ACPICA: Update header files copyrights to 2012 ACPICA: Add new ACPI table load/unload external interfaces ACPICA: Split file: tbxface.c -> tbxfload.c ACPICA: Add PCC address space to space ID decode function ACPICA: Fix some comment fields ACPICA: Table manager: deploy new firmware error/warning interfaces ACPICA: Add new interfaces for BIOS(firmware) errors and warnings ACPICA: Split exception code utilities to a new file, utexcep.c ACPI: acpi_pad: tune round_robin_time ACPICA: Update to version 20120620 ACPICA: Add support for implicit notify on multiple devices ACPICA: Update comments; no functional change ...
Diffstat (limited to 'tools')
-rw-r--r--tools/power/x86/turbostat/Makefile1
-rw-r--r--tools/power/x86/turbostat/turbostat.877
-rw-r--r--tools/power/x86/turbostat/turbostat.c1333
3 files changed, 872 insertions, 539 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index fd8e1f1297aa..f85649554191 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,4 +1,5 @@
1turbostat : turbostat.c 1turbostat : turbostat.c
2CFLAGS += -Wall
2 3
3clean : 4clean :
4 rm -f turbostat 5 rm -f turbostat
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index adf175f61496..74e44507dfe9 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -27,7 +27,11 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs.
27on processors that additionally support C-state residency counters. 27on processors that additionally support C-state residency counters.
28 28
29.SS Options 29.SS Options
30The \fB-s\fP option prints only a 1-line summary for each sample interval. 30The \fB-s\fP option limits output to a 1-line system summary for each interval.
31.PP
32The \fB-c\fP option limits output to the 1st thread in each core.
33.PP
34The \fB-p\fP option limits output to the 1st thread in each package.
31.PP 35.PP
32The \fB-v\fP option increases verbosity. 36The \fB-v\fP option increases verbosity.
33.PP 37.PP
@@ -65,19 +69,19 @@ Subsequent rows show per-CPU statistics.
65.nf 69.nf
66[root@x980]# ./turbostat 70[root@x980]# ./turbostat
67cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 71cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
68 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 72 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61
69 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 73 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61
70 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 74 0 6 0.05 1.62 3.38 10.34
71 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 75 1 2 0.03 1.62 3.38 0.07 0.05 99.86
72 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 76 1 8 0.03 1.62 3.38 0.06
73 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 77 2 4 0.21 1.62 3.38 0.10 1.49 98.21
74 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 78 2 10 0.02 1.62 3.38 0.29
75 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 79 8 1 0.04 1.62 3.38 0.04 0.08 99.84
76 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 80 8 7 0.01 1.62 3.38 0.06
77 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 81 9 3 0.53 1.62 3.38 0.10 0.20 99.17
78 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 82 9 9 0.02 1.62 3.38 0.60
79 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 83 10 5 0.01 1.62 3.38 0.02 0.04 99.92
80 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 84 10 11 0.02 1.62 3.38 0.02
81.fi 85.fi
82.SH SUMMARY EXAMPLE 86.SH SUMMARY EXAMPLE
83The "-s" option prints the column headers just once, 87The "-s" option prints the column headers just once,
@@ -86,9 +90,10 @@ and then the one line system summary for each sample interval.
86.nf 90.nf
87[root@x980]# ./turbostat -s 91[root@x980]# ./turbostat -s
88 %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 92 %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
89 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 93 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12
90 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 94 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60
91 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 95 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36
96 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90
92.fi 97.fi
93.SH VERBOSE EXAMPLE 98.SH VERBOSE EXAMPLE
94The "-v" option adds verbosity to the output: 99The "-v" option adds verbosity to the output:
@@ -120,30 +125,28 @@ until ^C while the other CPUs are mostly idle:
120[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null 125[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
121^C 126^C
122cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 127cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
123 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 128 8.86 3.61 3.38 15.06 31.19 44.89 0.00 0.00
124 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 129 0 0 1.46 3.22 3.38 16.84 29.48 52.22 0.00 0.00
125 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 130 0 6 0.21 3.06 3.38 18.09
126 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 131 1 2 0.53 3.33 3.38 2.80 46.40 50.27
127 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 132 1 8 0.89 3.47 3.38 2.44
128 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 133 2 4 1.36 3.43 3.38 9.04 23.71 65.89
129 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 134 2 10 0.18 2.86 3.38 10.22
130 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 135 8 1 0.04 2.87 3.38 99.96 0.01 0.00
131 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 136 8 7 99.72 3.63 3.38 0.27
132 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 137 9 3 0.31 3.21 3.38 7.64 56.55 35.50
133 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 138 9 9 0.08 2.95 3.38 7.88
134 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 139 10 5 1.42 3.43 3.38 2.14 30.99 65.44
135 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 140 10 11 0.16 2.88 3.38 3.40
1364.907015 sec
137
138.fi 141.fi
139Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit 142Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit
140while the other processors are generally in various states of idle. 143while the other processors are generally in various states of idle.
141 144
142Note that cpu0 is an HT sibling sharing core0 145Note that cpu1 and cpu7 are HT siblings within core8.
143with cpu6, and thus it is unable to get to an idle state 146As cpu7 is very busy, it prevents its sibling, cpu1,
144deeper than c1 while cpu6 is busy. 147from entering a c-state deeper than c1.
145 148
146Note that turbostat reports average GHz of 3.64, while 149Note that turbostat reports average GHz of 3.63, while
147the arithmetic average of the GHz column above is lower. 150the arithmetic average of the GHz column above is lower.
148This is a weighted average, where the weight is %c0. ie. it is the total number of 151This is a weighted average, where the weight is %c0. ie. it is the total number of
149un-halted cycles elapsed per time divided by the number of CPUs. 152un-halted cycles elapsed per time divided by the number of CPUs.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 16de7ad4850f..861d77190206 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -67,92 +67,119 @@ double bclk;
67unsigned int show_pkg; 67unsigned int show_pkg;
68unsigned int show_core; 68unsigned int show_core;
69unsigned int show_cpu; 69unsigned int show_cpu;
70unsigned int show_pkg_only;
71unsigned int show_core_only;
72char *output_buffer, *outp;
70 73
71int aperf_mperf_unstable; 74int aperf_mperf_unstable;
72int backwards_count; 75int backwards_count;
73char *progname; 76char *progname;
74 77
75int num_cpus; 78cpu_set_t *cpu_present_set, *cpu_affinity_set;
76cpu_set_t *cpu_present_set, *cpu_mask; 79size_t cpu_present_setsize, cpu_affinity_setsize;
77size_t cpu_present_setsize, cpu_mask_size; 80
78 81struct thread_data {
79struct counters { 82 unsigned long long tsc;
80 unsigned long long tsc; /* per thread */ 83 unsigned long long aperf;
81 unsigned long long aperf; /* per thread */ 84 unsigned long long mperf;
82 unsigned long long mperf; /* per thread */ 85 unsigned long long c1; /* derived */
83 unsigned long long c1; /* per thread (calculated) */ 86 unsigned long long extra_msr;
84 unsigned long long c3; /* per core */ 87 unsigned int cpu_id;
85 unsigned long long c6; /* per core */ 88 unsigned int flags;
86 unsigned long long c7; /* per core */ 89#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
87 unsigned long long pc2; /* per package */ 90#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
88 unsigned long long pc3; /* per package */ 91} *thread_even, *thread_odd;
89 unsigned long long pc6; /* per package */ 92
90 unsigned long long pc7; /* per package */ 93struct core_data {
91 unsigned long long extra_msr; /* per thread */ 94 unsigned long long c3;
92 int pkg; 95 unsigned long long c6;
93 int core; 96 unsigned long long c7;
94 int cpu; 97 unsigned int core_id;
95 struct counters *next; 98} *core_even, *core_odd;
96}; 99
97 100struct pkg_data {
98struct counters *cnt_even; 101 unsigned long long pc2;
99struct counters *cnt_odd; 102 unsigned long long pc3;
100struct counters *cnt_delta; 103 unsigned long long pc6;
101struct counters *cnt_average; 104 unsigned long long pc7;
102struct timeval tv_even; 105 unsigned int package_id;
103struct timeval tv_odd; 106} *package_even, *package_odd;
104struct timeval tv_delta; 107
105 108#define ODD_COUNTERS thread_odd, core_odd, package_odd
106int mark_cpu_present(int pkg, int core, int cpu) 109#define EVEN_COUNTERS thread_even, core_even, package_even
110
111#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
112 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
113 topo.num_threads_per_core + \
114 (core_no) * topo.num_threads_per_core + (thread_no))
115#define GET_CORE(core_base, core_no, pkg_no) \
116 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
117#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
118
119struct system_summary {
120 struct thread_data threads;
121 struct core_data cores;
122 struct pkg_data packages;
123} sum, average;
124
125
126struct topo_params {
127 int num_packages;
128 int num_cpus;
129 int num_cores;
130 int max_cpu_num;
131 int num_cores_per_pkg;
132 int num_threads_per_core;
133} topo;
134
135struct timeval tv_even, tv_odd, tv_delta;
136
137void setup_all_buffers(void);
138
139int cpu_is_not_present(int cpu)
107{ 140{
108 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 141 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
109 return 0;
110} 142}
111
112/* 143/*
113 * cpu_mask_init(ncpus) 144 * run func(thread, core, package) in topology order
114 * 145 * skip non-present cpus
115 * allocate and clear cpu_mask
116 * set cpu_mask_size
117 */ 146 */
118void cpu_mask_init(int ncpus) 147
148int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
149 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
119{ 150{
120 cpu_mask = CPU_ALLOC(ncpus); 151 int retval, pkg_no, core_no, thread_no;
121 if (cpu_mask == NULL) {
122 perror("CPU_ALLOC");
123 exit(3);
124 }
125 cpu_mask_size = CPU_ALLOC_SIZE(ncpus);
126 CPU_ZERO_S(cpu_mask_size, cpu_mask);
127 152
128 /* 153 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
129 * Allocate and initialize cpu_present_set 154 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
130 */ 155 for (thread_no = 0; thread_no <
131 cpu_present_set = CPU_ALLOC(ncpus); 156 topo.num_threads_per_core; ++thread_no) {
132 if (cpu_present_set == NULL) { 157 struct thread_data *t;
133 perror("CPU_ALLOC"); 158 struct core_data *c;
134 exit(3); 159 struct pkg_data *p;
135 }
136 cpu_present_setsize = CPU_ALLOC_SIZE(ncpus);
137 CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
138 for_all_cpus(mark_cpu_present);
139}
140 160
141void cpu_mask_uninit() 161 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
142{ 162
143 CPU_FREE(cpu_mask); 163 if (cpu_is_not_present(t->cpu_id))
144 cpu_mask = NULL; 164 continue;
145 cpu_mask_size = 0; 165
146 CPU_FREE(cpu_present_set); 166 c = GET_CORE(core_base, core_no, pkg_no);
147 cpu_present_set = NULL; 167 p = GET_PKG(pkg_base, pkg_no);
148 cpu_present_setsize = 0; 168
169 retval = func(t, c, p);
170 if (retval)
171 return retval;
172 }
173 }
174 }
175 return 0;
149} 176}
150 177
151int cpu_migrate(int cpu) 178int cpu_migrate(int cpu)
152{ 179{
153 CPU_ZERO_S(cpu_mask_size, cpu_mask); 180 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
154 CPU_SET_S(cpu, cpu_mask_size, cpu_mask); 181 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
155 if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) 182 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
156 return -1; 183 return -1;
157 else 184 else
158 return 0; 185 return 0;
@@ -181,67 +208,72 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
181void print_header(void) 208void print_header(void)
182{ 209{
183 if (show_pkg) 210 if (show_pkg)
184 fprintf(stderr, "pk"); 211 outp += sprintf(outp, "pk");
185 if (show_pkg) 212 if (show_pkg)
186 fprintf(stderr, " "); 213 outp += sprintf(outp, " ");
187 if (show_core) 214 if (show_core)
188 fprintf(stderr, "cor"); 215 outp += sprintf(outp, "cor");
189 if (show_cpu) 216 if (show_cpu)
190 fprintf(stderr, " CPU"); 217 outp += sprintf(outp, " CPU");
191 if (show_pkg || show_core || show_cpu) 218 if (show_pkg || show_core || show_cpu)
192 fprintf(stderr, " "); 219 outp += sprintf(outp, " ");
193 if (do_nhm_cstates) 220 if (do_nhm_cstates)
194 fprintf(stderr, " %%c0"); 221 outp += sprintf(outp, " %%c0");
195 if (has_aperf) 222 if (has_aperf)
196 fprintf(stderr, " GHz"); 223 outp += sprintf(outp, " GHz");
197 fprintf(stderr, " TSC"); 224 outp += sprintf(outp, " TSC");
198 if (do_nhm_cstates) 225 if (do_nhm_cstates)
199 fprintf(stderr, " %%c1"); 226 outp += sprintf(outp, " %%c1");
200 if (do_nhm_cstates) 227 if (do_nhm_cstates)
201 fprintf(stderr, " %%c3"); 228 outp += sprintf(outp, " %%c3");
202 if (do_nhm_cstates) 229 if (do_nhm_cstates)
203 fprintf(stderr, " %%c6"); 230 outp += sprintf(outp, " %%c6");
204 if (do_snb_cstates) 231 if (do_snb_cstates)
205 fprintf(stderr, " %%c7"); 232 outp += sprintf(outp, " %%c7");
206 if (do_snb_cstates) 233 if (do_snb_cstates)
207 fprintf(stderr, " %%pc2"); 234 outp += sprintf(outp, " %%pc2");
208 if (do_nhm_cstates) 235 if (do_nhm_cstates)
209 fprintf(stderr, " %%pc3"); 236 outp += sprintf(outp, " %%pc3");
210 if (do_nhm_cstates) 237 if (do_nhm_cstates)
211 fprintf(stderr, " %%pc6"); 238 outp += sprintf(outp, " %%pc6");
212 if (do_snb_cstates) 239 if (do_snb_cstates)
213 fprintf(stderr, " %%pc7"); 240 outp += sprintf(outp, " %%pc7");
214 if (extra_msr_offset) 241 if (extra_msr_offset)
215 fprintf(stderr, " MSR 0x%x ", extra_msr_offset); 242 outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset);
216 243
217 putc('\n', stderr); 244 outp += sprintf(outp, "\n");
218} 245}
219 246
220void dump_cnt(struct counters *cnt) 247int dump_counters(struct thread_data *t, struct core_data *c,
248 struct pkg_data *p)
221{ 249{
222 if (!cnt) 250 fprintf(stderr, "t %p, c %p, p %p\n", t, c, p);
223 return; 251
224 if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg); 252 if (t) {
225 if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core); 253 fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
226 if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu); 254 fprintf(stderr, "TSC: %016llX\n", t->tsc);
227 if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc); 255 fprintf(stderr, "aperf: %016llX\n", t->aperf);
228 if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3); 256 fprintf(stderr, "mperf: %016llX\n", t->mperf);
229 if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6); 257 fprintf(stderr, "c1: %016llX\n", t->c1);
230 if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7); 258 fprintf(stderr, "msr0x%x: %016llX\n",
231 if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf); 259 extra_msr_offset, t->extra_msr);
232 if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2); 260 }
233 if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3);
234 if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6);
235 if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7);
236 if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr);
237}
238 261
239void dump_list(struct counters *cnt) 262 if (c) {
240{ 263 fprintf(stderr, "core: %d\n", c->core_id);
241 printf("dump_list 0x%p\n", cnt); 264 fprintf(stderr, "c3: %016llX\n", c->c3);
265 fprintf(stderr, "c6: %016llX\n", c->c6);
266 fprintf(stderr, "c7: %016llX\n", c->c7);
267 }
242 268
243 for (; cnt; cnt = cnt->next) 269 if (p) {
244 dump_cnt(cnt); 270 fprintf(stderr, "package: %d\n", p->package_id);
271 fprintf(stderr, "pc2: %016llX\n", p->pc2);
272 fprintf(stderr, "pc3: %016llX\n", p->pc3);
273 fprintf(stderr, "pc6: %016llX\n", p->pc6);
274 fprintf(stderr, "pc7: %016llX\n", p->pc7);
275 }
276 return 0;
245} 277}
246 278
247/* 279/*
@@ -253,321 +285,389 @@ void dump_list(struct counters *cnt)
253 * TSC: "TSC" 3 columns %3.2 285 * TSC: "TSC" 3 columns %3.2
254 * percentage " %pc3" %6.2 286 * percentage " %pc3" %6.2
255 */ 287 */
256void print_cnt(struct counters *p) 288int format_counters(struct thread_data *t, struct core_data *c,
289 struct pkg_data *p)
257{ 290{
258 double interval_float; 291 double interval_float;
259 292
293 /* if showing only 1st thread in core and this isn't one, bail out */
294 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
295 return 0;
296
297 /* if showing only 1st thread in pkg and this isn't one, bail out */
298 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
299 return 0;
300
260 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 301 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
261 302
262 /* topology columns, print blanks on 1st (average) line */ 303 /* topo columns, print blanks on 1st (average) line */
263 if (p == cnt_average) { 304 if (t == &average.threads) {
264 if (show_pkg) 305 if (show_pkg)
265 fprintf(stderr, " "); 306 outp += sprintf(outp, " ");
266 if (show_pkg && show_core) 307 if (show_pkg && show_core)
267 fprintf(stderr, " "); 308 outp += sprintf(outp, " ");
268 if (show_core) 309 if (show_core)
269 fprintf(stderr, " "); 310 outp += sprintf(outp, " ");
270 if (show_cpu) 311 if (show_cpu)
271 fprintf(stderr, " " " "); 312 outp += sprintf(outp, " " " ");
272 } else { 313 } else {
273 if (show_pkg) 314 if (show_pkg) {
274 fprintf(stderr, "%2d", p->pkg); 315 if (p)
316 outp += sprintf(outp, "%2d", p->package_id);
317 else
318 outp += sprintf(outp, " ");
319 }
275 if (show_pkg && show_core) 320 if (show_pkg && show_core)
276 fprintf(stderr, " "); 321 outp += sprintf(outp, " ");
277 if (show_core) 322 if (show_core) {
278 fprintf(stderr, "%3d", p->core); 323 if (c)
324 outp += sprintf(outp, "%3d", c->core_id);
325 else
326 outp += sprintf(outp, " ");
327 }
279 if (show_cpu) 328 if (show_cpu)
280 fprintf(stderr, " %3d", p->cpu); 329 outp += sprintf(outp, " %3d", t->cpu_id);
281 } 330 }
282 331
283 /* %c0 */ 332 /* %c0 */
284 if (do_nhm_cstates) { 333 if (do_nhm_cstates) {
285 if (show_pkg || show_core || show_cpu) 334 if (show_pkg || show_core || show_cpu)
286 fprintf(stderr, " "); 335 outp += sprintf(outp, " ");
287 if (!skip_c0) 336 if (!skip_c0)
288 fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); 337 outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc);
289 else 338 else
290 fprintf(stderr, " ****"); 339 outp += sprintf(outp, " ****");
291 } 340 }
292 341
293 /* GHz */ 342 /* GHz */
294 if (has_aperf) { 343 if (has_aperf) {
295 if (!aperf_mperf_unstable) { 344 if (!aperf_mperf_unstable) {
296 fprintf(stderr, " %3.2f", 345 outp += sprintf(outp, " %3.2f",
297 1.0 * p->tsc / units * p->aperf / 346 1.0 * t->tsc / units * t->aperf /
298 p->mperf / interval_float); 347 t->mperf / interval_float);
299 } else { 348 } else {
300 if (p->aperf > p->tsc || p->mperf > p->tsc) { 349 if (t->aperf > t->tsc || t->mperf > t->tsc) {
301 fprintf(stderr, " ***"); 350 outp += sprintf(outp, " ***");
302 } else { 351 } else {
303 fprintf(stderr, "%3.1f*", 352 outp += sprintf(outp, "%3.1f*",
304 1.0 * p->tsc / 353 1.0 * t->tsc /
305 units * p->aperf / 354 units * t->aperf /
306 p->mperf / interval_float); 355 t->mperf / interval_float);
307 } 356 }
308 } 357 }
309 } 358 }
310 359
311 /* TSC */ 360 /* TSC */
312 fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); 361 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
313 362
314 if (do_nhm_cstates) { 363 if (do_nhm_cstates) {
315 if (!skip_c1) 364 if (!skip_c1)
316 fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); 365 outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc);
317 else 366 else
318 fprintf(stderr, " ****"); 367 outp += sprintf(outp, " ****");
319 } 368 }
369
370 /* print per-core data only for 1st thread in core */
371 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
372 goto done;
373
320 if (do_nhm_cstates) 374 if (do_nhm_cstates)
321 fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); 375 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
322 if (do_nhm_cstates) 376 if (do_nhm_cstates)
323 fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); 377 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
324 if (do_snb_cstates) 378 if (do_snb_cstates)
325 fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); 379 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc);
380
381 /* print per-package data only for 1st core in package */
382 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
383 goto done;
384
326 if (do_snb_cstates) 385 if (do_snb_cstates)
327 fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); 386 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
328 if (do_nhm_cstates) 387 if (do_nhm_cstates)
329 fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); 388 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
330 if (do_nhm_cstates) 389 if (do_nhm_cstates)
331 fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); 390 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
332 if (do_snb_cstates) 391 if (do_snb_cstates)
333 fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); 392 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
393done:
334 if (extra_msr_offset) 394 if (extra_msr_offset)
335 fprintf(stderr, " 0x%016llx", p->extra_msr); 395 outp += sprintf(outp, " 0x%016llx", t->extra_msr);
336 putc('\n', stderr); 396 outp += sprintf(outp, "\n");
397
398 return 0;
337} 399}
338 400
339void print_counters(struct counters *counters) 401void flush_stdout()
402{
403 fputs(output_buffer, stdout);
404 outp = output_buffer;
405}
406void flush_stderr()
407{
408 fputs(output_buffer, stderr);
409 outp = output_buffer;
410}
411void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
340{ 412{
341 struct counters *cnt;
342 static int printed; 413 static int printed;
343 414
344
345 if (!printed || !summary_only) 415 if (!printed || !summary_only)
346 print_header(); 416 print_header();
347 417
348 if (num_cpus > 1) 418 if (topo.num_cpus > 1)
349 print_cnt(cnt_average); 419 format_counters(&average.threads, &average.cores,
420 &average.packages);
350 421
351 printed = 1; 422 printed = 1;
352 423
353 if (summary_only) 424 if (summary_only)
354 return; 425 return;
355 426
356 for (cnt = counters; cnt != NULL; cnt = cnt->next) 427 for_all_cpus(format_counters, t, c, p);
357 print_cnt(cnt);
358
359} 428}
360 429
361#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) 430void
431delta_package(struct pkg_data *new, struct pkg_data *old)
432{
433 old->pc2 = new->pc2 - old->pc2;
434 old->pc3 = new->pc3 - old->pc3;
435 old->pc6 = new->pc6 - old->pc6;
436 old->pc7 = new->pc7 - old->pc7;
437}
362 438
363int compute_delta(struct counters *after, 439void
364 struct counters *before, struct counters *delta) 440delta_core(struct core_data *new, struct core_data *old)
365{ 441{
366 int errors = 0; 442 old->c3 = new->c3 - old->c3;
367 int perf_err = 0; 443 old->c6 = new->c6 - old->c6;
444 old->c7 = new->c7 - old->c7;
445}
368 446
369 skip_c0 = skip_c1 = 0; 447/*
448 * old = new - old
449 */
450void
451delta_thread(struct thread_data *new, struct thread_data *old,
452 struct core_data *core_delta)
453{
454 old->tsc = new->tsc - old->tsc;
455
456 /* check for TSC < 1 Mcycles over interval */
457 if (old->tsc < (1000 * 1000)) {
458 fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n");
459 fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n");
460 fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n");
461 exit(-3);
462 }
370 463
371 for ( ; after && before && delta; 464 old->c1 = new->c1 - old->c1;
372 after = after->next, before = before->next, delta = delta->next) {
373 if (before->cpu != after->cpu) {
374 printf("cpu configuration changed: %d != %d\n",
375 before->cpu, after->cpu);
376 return -1;
377 }
378 465
379 if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { 466 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
380 fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", 467 old->aperf = new->aperf - old->aperf;
381 before->cpu, before->tsc, after->tsc); 468 old->mperf = new->mperf - old->mperf;
382 errors++; 469 } else {
383 }
384 /* check for TSC < 1 Mcycles over interval */
385 if (delta->tsc < (1000 * 1000)) {
386 fprintf(stderr, "Insanely slow TSC rate,"
387 " TSC stops in idle?\n");
388 fprintf(stderr, "You can disable all c-states"
389 " by booting with \"idle=poll\"\n");
390 fprintf(stderr, "or just the deep ones with"
391 " \"processor.max_cstate=1\"\n");
392 exit(-3);
393 }
394 if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
395 fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
396 before->cpu, before->c3, after->c3);
397 errors++;
398 }
399 if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
400 fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
401 before->cpu, before->c6, after->c6);
402 errors++;
403 }
404 if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
405 fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
406 before->cpu, before->c7, after->c7);
407 errors++;
408 }
409 if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
410 fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
411 before->cpu, before->pc2, after->pc2);
412 errors++;
413 }
414 if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
415 fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
416 before->cpu, before->pc3, after->pc3);
417 errors++;
418 }
419 if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
420 fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
421 before->cpu, before->pc6, after->pc6);
422 errors++;
423 }
424 if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
425 fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
426 before->cpu, before->pc7, after->pc7);
427 errors++;
428 }
429 470
430 perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); 471 if (!aperf_mperf_unstable) {
431 if (perf_err) { 472 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
432 fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", 473 fprintf(stderr, "* Frequency results do not cover entire interval *\n");
433 before->cpu, before->aperf, after->aperf); 474 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
434 }
435 perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
436 if (perf_err) {
437 fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
438 before->cpu, before->mperf, after->mperf);
439 }
440 if (perf_err) {
441 if (!aperf_mperf_unstable) {
442 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
443 fprintf(stderr, "* Frequency results do not cover entire interval *\n");
444 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
445 475
446 aperf_mperf_unstable = 1; 476 aperf_mperf_unstable = 1;
447 }
448 /*
449 * mperf delta is likely a huge "positive" number
450 * can not use it for calculating c0 time
451 */
452 skip_c0 = 1;
453 skip_c1 = 1;
454 } 477 }
455
456 /* 478 /*
457 * As mperf and tsc collection are not atomic, 479 * mperf delta is likely a huge "positive" number
458 * it is possible for mperf's non-halted cycles 480 * can not use it for calculating c0 time
459 * to exceed TSC's all cycles: show c1 = 0% in that case.
460 */ 481 */
461 if (delta->mperf > delta->tsc) 482 skip_c0 = 1;
462 delta->c1 = 0; 483 skip_c1 = 1;
463 else /* normal case, derive c1 */ 484 }
464 delta->c1 = delta->tsc - delta->mperf
465 - delta->c3 - delta->c6 - delta->c7;
466 485
467 if (delta->mperf == 0)
468 delta->mperf = 1; /* divide by 0 protection */
469 486
470 /* 487 /*
471 * for "extra msr", just copy the latest w/o subtracting 488 * As counter collection is not atomic,
472 */ 489 * it is possible for mperf's non-halted cycles + idle states
473 delta->extra_msr = after->extra_msr; 490 * to exceed TSC's all cycles: show c1 = 0% in that case.
474 if (errors) { 491 */
475 fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); 492 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
476 dump_cnt(before); 493 old->c1 = 0;
477 fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); 494 else {
478 dump_cnt(after); 495 /* normal case, derive c1 */
479 errors = 0; 496 old->c1 = old->tsc - old->mperf - core_delta->c3
480 } 497 - core_delta->c6 - core_delta->c7;
481 } 498 }
499
500 if (old->mperf == 0) {
501 if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
502 old->mperf = 1; /* divide by 0 protection */
503 }
504
505 /*
506 * for "extra msr", just copy the latest w/o subtracting
507 */
508 old->extra_msr = new->extra_msr;
509}
510
511int delta_cpu(struct thread_data *t, struct core_data *c,
512 struct pkg_data *p, struct thread_data *t2,
513 struct core_data *c2, struct pkg_data *p2)
514{
515 /* calculate core delta only for 1st thread in core */
516 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
517 delta_core(c, c2);
518
519 /* always calculate thread delta */
520 delta_thread(t, t2, c2); /* c2 is core delta */
521
522 /* calculate package delta only for 1st core in package */
523 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
524 delta_package(p, p2);
525
482 return 0; 526 return 0;
483} 527}
484 528
485void compute_average(struct counters *delta, struct counters *avg) 529void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
530{
531 t->tsc = 0;
532 t->aperf = 0;
533 t->mperf = 0;
534 t->c1 = 0;
535
536 /* tells format_counters to dump all fields from this set */
537 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
538
539 c->c3 = 0;
540 c->c6 = 0;
541 c->c7 = 0;
542
543 p->pc2 = 0;
544 p->pc3 = 0;
545 p->pc6 = 0;
546 p->pc7 = 0;
547}
548int sum_counters(struct thread_data *t, struct core_data *c,
549 struct pkg_data *p)
486{ 550{
487 struct counters *sum; 551 average.threads.tsc += t->tsc;
552 average.threads.aperf += t->aperf;
553 average.threads.mperf += t->mperf;
554 average.threads.c1 += t->c1;
488 555
489 sum = calloc(1, sizeof(struct counters)); 556 /* sum per-core values only for 1st thread in core */
490 if (sum == NULL) { 557 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
491 perror("calloc sum"); 558 return 0;
492 exit(1);
493 }
494 559
495 for (; delta; delta = delta->next) { 560 average.cores.c3 += c->c3;
496 sum->tsc += delta->tsc; 561 average.cores.c6 += c->c6;
497 sum->c1 += delta->c1; 562 average.cores.c7 += c->c7;
498 sum->c3 += delta->c3; 563
499 sum->c6 += delta->c6; 564 /* sum per-pkg values only for 1st core in pkg */
500 sum->c7 += delta->c7; 565 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
501 sum->aperf += delta->aperf; 566 return 0;
502 sum->mperf += delta->mperf; 567
503 sum->pc2 += delta->pc2; 568 average.packages.pc2 += p->pc2;
504 sum->pc3 += delta->pc3; 569 average.packages.pc3 += p->pc3;
505 sum->pc6 += delta->pc6; 570 average.packages.pc6 += p->pc6;
506 sum->pc7 += delta->pc7; 571 average.packages.pc7 += p->pc7;
507 } 572
508 avg->tsc = sum->tsc/num_cpus; 573 return 0;
509 avg->c1 = sum->c1/num_cpus; 574}
510 avg->c3 = sum->c3/num_cpus; 575/*
511 avg->c6 = sum->c6/num_cpus; 576 * sum the counters for all cpus in the system
512 avg->c7 = sum->c7/num_cpus; 577 * compute the weighted average
513 avg->aperf = sum->aperf/num_cpus; 578 */
514 avg->mperf = sum->mperf/num_cpus; 579void compute_average(struct thread_data *t, struct core_data *c,
515 avg->pc2 = sum->pc2/num_cpus; 580 struct pkg_data *p)
516 avg->pc3 = sum->pc3/num_cpus; 581{
517 avg->pc6 = sum->pc6/num_cpus; 582 clear_counters(&average.threads, &average.cores, &average.packages);
518 avg->pc7 = sum->pc7/num_cpus; 583
519 584 for_all_cpus(sum_counters, t, c, p);
520 free(sum); 585
586 average.threads.tsc /= topo.num_cpus;
587 average.threads.aperf /= topo.num_cpus;
588 average.threads.mperf /= topo.num_cpus;
589 average.threads.c1 /= topo.num_cpus;
590
591 average.cores.c3 /= topo.num_cores;
592 average.cores.c6 /= topo.num_cores;
593 average.cores.c7 /= topo.num_cores;
594
595 average.packages.pc2 /= topo.num_packages;
596 average.packages.pc3 /= topo.num_packages;
597 average.packages.pc6 /= topo.num_packages;
598 average.packages.pc7 /= topo.num_packages;
521} 599}
522 600
523int get_counters(struct counters *cnt) 601static unsigned long long rdtsc(void)
524{ 602{
525 for ( ; cnt; cnt = cnt->next) { 603 unsigned int low, high;
526 604
527 if (cpu_migrate(cnt->cpu)) 605 asm volatile("rdtsc" : "=a" (low), "=d" (high));
528 return -1;
529 606
530 if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) 607 return low | ((unsigned long long)high) << 32;
531 return -1; 608}
532 609
533 if (has_aperf) {
534 if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf))
535 return -1;
536 if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf))
537 return -1;
538 }
539 610
540 if (do_nhm_cstates) { 611/*
541 if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) 612 * get_counters(...)
542 return -1; 613 * migrate to cpu
543 if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) 614 * acquire and record local counters for that cpu
544 return -1; 615 */
545 } 616int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
617{
618 int cpu = t->cpu_id;
546 619
547 if (do_snb_cstates) 620 if (cpu_migrate(cpu))
548 if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) 621 return -1;
549 return -1;
550 622
551 if (do_nhm_cstates) { 623 t->tsc = rdtsc(); /* we are running on local CPU of interest */
552 if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) 624
553 return -1; 625 if (has_aperf) {
554 if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) 626 if (get_msr(cpu, MSR_APERF, &t->aperf))
555 return -1; 627 return -3;
556 } 628 if (get_msr(cpu, MSR_MPERF, &t->mperf))
557 if (do_snb_cstates) { 629 return -4;
558 if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) 630 }
559 return -1; 631
560 if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) 632 if (extra_msr_offset)
561 return -1; 633 if (get_msr(cpu, extra_msr_offset, &t->extra_msr))
562 } 634 return -5;
563 if (extra_msr_offset) 635
564 if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) 636 /* collect core counters only for 1st thread in core */
565 return -1; 637 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
638 return 0;
639
640 if (do_nhm_cstates) {
641 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
642 return -6;
643 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
644 return -7;
645 }
646
647 if (do_snb_cstates)
648 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
649 return -8;
650
651 /* collect package counters only for 1st core in package */
652 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
653 return 0;
654
655 if (do_nhm_cstates) {
656 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
657 return -9;
658 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
659 return -10;
660 }
661 if (do_snb_cstates) {
662 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
663 return -11;
664 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
665 return -12;
566 } 666 }
567 return 0; 667 return 0;
568} 668}
569 669
570void print_nehalem_info(void) 670void print_verbose_header(void)
571{ 671{
572 unsigned long long msr; 672 unsigned long long msr;
573 unsigned int ratio; 673 unsigned int ratio;
@@ -615,143 +715,82 @@ void print_nehalem_info(void)
615 715
616} 716}
617 717
618void free_counter_list(struct counters *list) 718void free_all_buffers(void)
619{ 719{
620 struct counters *p; 720 CPU_FREE(cpu_present_set);
721 cpu_present_set = NULL;
722 cpu_present_set = 0;
621 723
622 for (p = list; p; ) { 724 CPU_FREE(cpu_affinity_set);
623 struct counters *free_me; 725 cpu_affinity_set = NULL;
726 cpu_affinity_setsize = 0;
624 727
625 free_me = p; 728 free(thread_even);
626 p = p->next; 729 free(core_even);
627 free(free_me); 730 free(package_even);
628 }
629}
630 731
631void free_all_counters(void) 732 thread_even = NULL;
632{ 733 core_even = NULL;
633 free_counter_list(cnt_even); 734 package_even = NULL;
634 cnt_even = NULL;
635 735
636 free_counter_list(cnt_odd); 736 free(thread_odd);
637 cnt_odd = NULL; 737 free(core_odd);
738 free(package_odd);
638 739
639 free_counter_list(cnt_delta); 740 thread_odd = NULL;
640 cnt_delta = NULL; 741 core_odd = NULL;
742 package_odd = NULL;
641 743
642 free_counter_list(cnt_average); 744 free(output_buffer);
643 cnt_average = NULL; 745 output_buffer = NULL;
746 outp = NULL;
644} 747}
645 748
646void insert_counters(struct counters **list, 749/*
647 struct counters *new) 750 * cpu_is_first_sibling_in_core(cpu)
751 * return 1 if given CPU is 1st HT sibling in the core
752 */
753int cpu_is_first_sibling_in_core(int cpu)
648{ 754{
649 struct counters *prev; 755 char path[64];
650 756 FILE *filep;
651 /* 757 int first_cpu;
652 * list was empty
653 */
654 if (*list == NULL) {
655 new->next = *list;
656 *list = new;
657 return;
658 }
659
660 if (!summary_only)
661 show_cpu = 1; /* there is more than one CPU */
662
663 /*
664 * insert on front of list.
665 * It is sorted by ascending package#, core#, cpu#
666 */
667 if (((*list)->pkg > new->pkg) ||
668 (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
669 (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
670 new->next = *list;
671 *list = new;
672 return;
673 }
674
675 prev = *list;
676
677 while (prev->next && (prev->next->pkg < new->pkg)) {
678 prev = prev->next;
679 if (!summary_only)
680 show_pkg = 1; /* there is more than 1 package */
681 }
682
683 while (prev->next && (prev->next->pkg == new->pkg)
684 && (prev->next->core < new->core)) {
685 prev = prev->next;
686 if (!summary_only)
687 show_core = 1; /* there is more than 1 core */
688 }
689 758
690 while (prev->next && (prev->next->pkg == new->pkg) 759 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
691 && (prev->next->core == new->core) 760 filep = fopen(path, "r");
692 && (prev->next->cpu < new->cpu)) { 761 if (filep == NULL) {
693 prev = prev->next; 762 perror(path);
763 exit(1);
694 } 764 }
695 765 fscanf(filep, "%d", &first_cpu);
696 /* 766 fclose(filep);
697 * insert after "prev" 767 return (cpu == first_cpu);
698 */
699 new->next = prev->next;
700 prev->next = new;
701} 768}
702 769
703void alloc_new_counters(int pkg, int core, int cpu) 770/*
771 * cpu_is_first_core_in_package(cpu)
772 * return 1 if given CPU is 1st core in package
773 */
774int cpu_is_first_core_in_package(int cpu)
704{ 775{
705 struct counters *new; 776 char path[64];
706 777 FILE *filep;
707 if (verbose > 1) 778 int first_cpu;
708 printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
709
710 new = (struct counters *)calloc(1, sizeof(struct counters));
711 if (new == NULL) {
712 perror("calloc");
713 exit(1);
714 }
715 new->pkg = pkg;
716 new->core = core;
717 new->cpu = cpu;
718 insert_counters(&cnt_odd, new);
719
720 new = (struct counters *)calloc(1,
721 sizeof(struct counters));
722 if (new == NULL) {
723 perror("calloc");
724 exit(1);
725 }
726 new->pkg = pkg;
727 new->core = core;
728 new->cpu = cpu;
729 insert_counters(&cnt_even, new);
730
731 new = (struct counters *)calloc(1, sizeof(struct counters));
732 if (new == NULL) {
733 perror("calloc");
734 exit(1);
735 }
736 new->pkg = pkg;
737 new->core = core;
738 new->cpu = cpu;
739 insert_counters(&cnt_delta, new);
740 779
741 new = (struct counters *)calloc(1, sizeof(struct counters)); 780 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
742 if (new == NULL) { 781 filep = fopen(path, "r");
743 perror("calloc"); 782 if (filep == NULL) {
783 perror(path);
744 exit(1); 784 exit(1);
745 } 785 }
746 new->pkg = pkg; 786 fscanf(filep, "%d", &first_cpu);
747 new->core = core; 787 fclose(filep);
748 new->cpu = cpu; 788 return (cpu == first_cpu);
749 cnt_average = new;
750} 789}
751 790
752int get_physical_package_id(int cpu) 791int get_physical_package_id(int cpu)
753{ 792{
754 char path[64]; 793 char path[80];
755 FILE *filep; 794 FILE *filep;
756 int pkg; 795 int pkg;
757 796
@@ -768,7 +807,7 @@ int get_physical_package_id(int cpu)
768 807
769int get_core_id(int cpu) 808int get_core_id(int cpu)
770{ 809{
771 char path[64]; 810 char path[80];
772 FILE *filep; 811 FILE *filep;
773 int core; 812 int core;
774 813
@@ -783,14 +822,87 @@ int get_core_id(int cpu)
783 return core; 822 return core;
784} 823}
785 824
825int get_num_ht_siblings(int cpu)
826{
827 char path[80];
828 FILE *filep;
829 int sib1, sib2;
830 int matches;
831 char character;
832
833 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
834 filep = fopen(path, "r");
835 if (filep == NULL) {
836 perror(path);
837 exit(1);
838 }
839 /*
840 * file format:
841 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
842 * otherwinse 1 sibling (self).
843 */
844 matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
845
846 fclose(filep);
847
848 if (matches == 3)
849 return 2;
850 else
851 return 1;
852}
853
786/* 854/*
787 * run func(pkg, core, cpu) on every cpu in /proc/stat 855 * run func(thread, core, package) in topology order
856 * skip non-present cpus
788 */ 857 */
789 858
790int for_all_cpus(void (func)(int, int, int)) 859int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
860 struct pkg_data *, struct thread_data *, struct core_data *,
861 struct pkg_data *), struct thread_data *thread_base,
862 struct core_data *core_base, struct pkg_data *pkg_base,
863 struct thread_data *thread_base2, struct core_data *core_base2,
864 struct pkg_data *pkg_base2)
865{
866 int retval, pkg_no, core_no, thread_no;
867
868 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
869 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
870 for (thread_no = 0; thread_no <
871 topo.num_threads_per_core; ++thread_no) {
872 struct thread_data *t, *t2;
873 struct core_data *c, *c2;
874 struct pkg_data *p, *p2;
875
876 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
877
878 if (cpu_is_not_present(t->cpu_id))
879 continue;
880
881 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
882
883 c = GET_CORE(core_base, core_no, pkg_no);
884 c2 = GET_CORE(core_base2, core_no, pkg_no);
885
886 p = GET_PKG(pkg_base, pkg_no);
887 p2 = GET_PKG(pkg_base2, pkg_no);
888
889 retval = func(t, c, p, t2, c2, p2);
890 if (retval)
891 return retval;
892 }
893 }
894 }
895 return 0;
896}
897
898/*
899 * run func(cpu) on every cpu in /proc/stat
900 * return max_cpu number
901 */
902int for_all_proc_cpus(int (func)(int))
791{ 903{
792 FILE *fp; 904 FILE *fp;
793 int cpu_count; 905 int cpu_num;
794 int retval; 906 int retval;
795 907
796 fp = fopen(proc_stat, "r"); 908 fp = fopen(proc_stat, "r");
@@ -805,78 +917,88 @@ int for_all_cpus(void (func)(int, int, int))
805 exit(1); 917 exit(1);
806 } 918 }
807 919
808 for (cpu_count = 0; ; cpu_count++) { 920 while (1) {
809 int cpu; 921 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
810
811 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
812 if (retval != 1) 922 if (retval != 1)
813 break; 923 break;
814 924
815 func(get_physical_package_id(cpu), get_core_id(cpu), cpu); 925 retval = func(cpu_num);
926 if (retval) {
927 fclose(fp);
928 return(retval);
929 }
816 } 930 }
817 fclose(fp); 931 fclose(fp);
818 return cpu_count; 932 return 0;
819} 933}
820 934
821void re_initialize(void) 935void re_initialize(void)
822{ 936{
823 free_all_counters(); 937 free_all_buffers();
824 num_cpus = for_all_cpus(alloc_new_counters); 938 setup_all_buffers();
825 cpu_mask_uninit(); 939 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
826 cpu_mask_init(num_cpus);
827 printf("turbostat: re-initialized with num_cpus %d\n", num_cpus);
828} 940}
829 941
830void dummy(int pkg, int core, int cpu) { return; } 942
831/* 943/*
832 * check to see if a cpu came on-line 944 * count_cpus()
945 * remember the last one seen, it will be the max
833 */ 946 */
834int verify_num_cpus(void) 947int count_cpus(int cpu)
835{ 948{
836 int new_num_cpus; 949 if (topo.max_cpu_num < cpu)
837 950 topo.max_cpu_num = cpu;
838 new_num_cpus = for_all_cpus(dummy);
839 951
840 if (new_num_cpus != num_cpus) { 952 topo.num_cpus += 1;
841 if (verbose) 953 return 0;
842 printf("num_cpus was %d, is now %d\n", 954}
843 num_cpus, new_num_cpus); 955int mark_cpu_present(int cpu)
844 return -1; 956{
845 } 957 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
846 return 0; 958 return 0;
847} 959}
848 960
849void turbostat_loop() 961void turbostat_loop()
850{ 962{
963 int retval;
964
851restart: 965restart:
852 get_counters(cnt_even); 966 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
967 if (retval) {
968 re_initialize();
969 goto restart;
970 }
853 gettimeofday(&tv_even, (struct timezone *)NULL); 971 gettimeofday(&tv_even, (struct timezone *)NULL);
854 972
855 while (1) { 973 while (1) {
856 if (verify_num_cpus()) { 974 if (for_all_proc_cpus(cpu_is_not_present)) {
857 re_initialize(); 975 re_initialize();
858 goto restart; 976 goto restart;
859 } 977 }
860 sleep(interval_sec); 978 sleep(interval_sec);
861 if (get_counters(cnt_odd)) { 979 retval = for_all_cpus(get_counters, ODD_COUNTERS);
980 if (retval) {
862 re_initialize(); 981 re_initialize();
863 goto restart; 982 goto restart;
864 } 983 }
865 gettimeofday(&tv_odd, (struct timezone *)NULL); 984 gettimeofday(&tv_odd, (struct timezone *)NULL);
866 compute_delta(cnt_odd, cnt_even, cnt_delta);
867 timersub(&tv_odd, &tv_even, &tv_delta); 985 timersub(&tv_odd, &tv_even, &tv_delta);
868 compute_average(cnt_delta, cnt_average); 986 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
869 print_counters(cnt_delta); 987 compute_average(EVEN_COUNTERS);
988 format_all_counters(EVEN_COUNTERS);
989 flush_stdout();
870 sleep(interval_sec); 990 sleep(interval_sec);
871 if (get_counters(cnt_even)) { 991 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
992 if (retval) {
872 re_initialize(); 993 re_initialize();
873 goto restart; 994 goto restart;
874 } 995 }
875 gettimeofday(&tv_even, (struct timezone *)NULL); 996 gettimeofday(&tv_even, (struct timezone *)NULL);
876 compute_delta(cnt_even, cnt_odd, cnt_delta);
877 timersub(&tv_even, &tv_odd, &tv_delta); 997 timersub(&tv_even, &tv_odd, &tv_delta);
878 compute_average(cnt_delta, cnt_average); 998 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
879 print_counters(cnt_delta); 999 compute_average(ODD_COUNTERS);
1000 format_all_counters(ODD_COUNTERS);
1001 flush_stdout();
880 } 1002 }
881} 1003}
882 1004
@@ -1051,6 +1173,208 @@ int open_dev_cpu_msr(int dummy1)
1051 return 0; 1173 return 0;
1052} 1174}
1053 1175
1176void topology_probe()
1177{
1178 int i;
1179 int max_core_id = 0;
1180 int max_package_id = 0;
1181 int max_siblings = 0;
1182 struct cpu_topology {
1183 int core_id;
1184 int physical_package_id;
1185 } *cpus;
1186
1187 /* Initialize num_cpus, max_cpu_num */
1188 topo.num_cpus = 0;
1189 topo.max_cpu_num = 0;
1190 for_all_proc_cpus(count_cpus);
1191 if (!summary_only && topo.num_cpus > 1)
1192 show_cpu = 1;
1193
1194 if (verbose > 1)
1195 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
1196
1197 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
1198 if (cpus == NULL) {
1199 perror("calloc cpus");
1200 exit(1);
1201 }
1202
1203 /*
1204 * Allocate and initialize cpu_present_set
1205 */
1206 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
1207 if (cpu_present_set == NULL) {
1208 perror("CPU_ALLOC");
1209 exit(3);
1210 }
1211 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
1212 CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
1213 for_all_proc_cpus(mark_cpu_present);
1214
1215 /*
1216 * Allocate and initialize cpu_affinity_set
1217 */
1218 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
1219 if (cpu_affinity_set == NULL) {
1220 perror("CPU_ALLOC");
1221 exit(3);
1222 }
1223 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
1224 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
1225
1226
1227 /*
1228 * For online cpus
1229 * find max_core_id, max_package_id
1230 */
1231 for (i = 0; i <= topo.max_cpu_num; ++i) {
1232 int siblings;
1233
1234 if (cpu_is_not_present(i)) {
1235 if (verbose > 1)
1236 fprintf(stderr, "cpu%d NOT PRESENT\n", i);
1237 continue;
1238 }
1239 cpus[i].core_id = get_core_id(i);
1240 if (cpus[i].core_id > max_core_id)
1241 max_core_id = cpus[i].core_id;
1242
1243 cpus[i].physical_package_id = get_physical_package_id(i);
1244 if (cpus[i].physical_package_id > max_package_id)
1245 max_package_id = cpus[i].physical_package_id;
1246
1247 siblings = get_num_ht_siblings(i);
1248 if (siblings > max_siblings)
1249 max_siblings = siblings;
1250 if (verbose > 1)
1251 fprintf(stderr, "cpu %d pkg %d core %d\n",
1252 i, cpus[i].physical_package_id, cpus[i].core_id);
1253 }
1254 topo.num_cores_per_pkg = max_core_id + 1;
1255 if (verbose > 1)
1256 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
1257 max_core_id, topo.num_cores_per_pkg);
1258 if (!summary_only && topo.num_cores_per_pkg > 1)
1259 show_core = 1;
1260
1261 topo.num_packages = max_package_id + 1;
1262 if (verbose > 1)
1263 fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
1264 max_package_id, topo.num_packages);
1265 if (!summary_only && topo.num_packages > 1)
1266 show_pkg = 1;
1267
1268 topo.num_threads_per_core = max_siblings;
1269 if (verbose > 1)
1270 fprintf(stderr, "max_siblings %d\n", max_siblings);
1271
1272 free(cpus);
1273}
1274
1275void
1276allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
1277{
1278 int i;
1279
1280 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
1281 topo.num_packages, sizeof(struct thread_data));
1282 if (*t == NULL)
1283 goto error;
1284
1285 for (i = 0; i < topo.num_threads_per_core *
1286 topo.num_cores_per_pkg * topo.num_packages; i++)
1287 (*t)[i].cpu_id = -1;
1288
1289 *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
1290 sizeof(struct core_data));
1291 if (*c == NULL)
1292 goto error;
1293
1294 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
1295 (*c)[i].core_id = -1;
1296
1297 *p = calloc(topo.num_packages, sizeof(struct pkg_data));
1298 if (*p == NULL)
1299 goto error;
1300
1301 for (i = 0; i < topo.num_packages; i++)
1302 (*p)[i].package_id = i;
1303
1304 return;
1305error:
1306 perror("calloc counters");
1307 exit(1);
1308}
1309/*
1310 * init_counter()
1311 *
1312 * set cpu_id, core_num, pkg_num
1313 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
1314 *
1315 * increment topo.num_cores when 1st core in pkg seen
1316 */
1317void init_counter(struct thread_data *thread_base, struct core_data *core_base,
1318 struct pkg_data *pkg_base, int thread_num, int core_num,
1319 int pkg_num, int cpu_id)
1320{
1321 struct thread_data *t;
1322 struct core_data *c;
1323 struct pkg_data *p;
1324
1325 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
1326 c = GET_CORE(core_base, core_num, pkg_num);
1327 p = GET_PKG(pkg_base, pkg_num);
1328
1329 t->cpu_id = cpu_id;
1330 if (thread_num == 0) {
1331 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
1332 if (cpu_is_first_core_in_package(cpu_id))
1333 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
1334 }
1335
1336 c->core_id = core_num;
1337 p->package_id = pkg_num;
1338}
1339
1340
1341int initialize_counters(int cpu_id)
1342{
1343 int my_thread_id, my_core_id, my_package_id;
1344
1345 my_package_id = get_physical_package_id(cpu_id);
1346 my_core_id = get_core_id(cpu_id);
1347
1348 if (cpu_is_first_sibling_in_core(cpu_id)) {
1349 my_thread_id = 0;
1350 topo.num_cores++;
1351 } else {
1352 my_thread_id = 1;
1353 }
1354
1355 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
1356 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
1357 return 0;
1358}
1359
1360void allocate_output_buffer()
1361{
1362 output_buffer = calloc(1, (1 + topo.num_cpus) * 128);
1363 outp = output_buffer;
1364 if (outp == NULL) {
1365 perror("calloc");
1366 exit(-1);
1367 }
1368}
1369
1370void setup_all_buffers(void)
1371{
1372 topology_probe();
1373 allocate_counters(&thread_even, &core_even, &package_even);
1374 allocate_counters(&thread_odd, &core_odd, &package_odd);
1375 allocate_output_buffer();
1376 for_all_proc_cpus(initialize_counters);
1377}
1054void turbostat_init() 1378void turbostat_init()
1055{ 1379{
1056 check_cpuid(); 1380 check_cpuid();
@@ -1058,21 +1382,19 @@ void turbostat_init()
1058 check_dev_msr(); 1382 check_dev_msr();
1059 check_super_user(); 1383 check_super_user();
1060 1384
1061 num_cpus = for_all_cpus(alloc_new_counters); 1385 setup_all_buffers();
1062 cpu_mask_init(num_cpus);
1063 1386
1064 if (verbose) 1387 if (verbose)
1065 print_nehalem_info(); 1388 print_verbose_header();
1066} 1389}
1067 1390
1068int fork_it(char **argv) 1391int fork_it(char **argv)
1069{ 1392{
1070 int retval;
1071 pid_t child_pid; 1393 pid_t child_pid;
1072 get_counters(cnt_even);
1073 1394
1074 /* clear affinity side-effect of get_counters() */ 1395 for_all_cpus(get_counters, EVEN_COUNTERS);
1075 sched_setaffinity(0, cpu_present_setsize, cpu_present_set); 1396 /* clear affinity side-effect of get_counters() */
1397 sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
1076 gettimeofday(&tv_even, (struct timezone *)NULL); 1398 gettimeofday(&tv_even, (struct timezone *)NULL);
1077 1399
1078 child_pid = fork(); 1400 child_pid = fork();
@@ -1095,14 +1417,17 @@ int fork_it(char **argv)
1095 exit(1); 1417 exit(1);
1096 } 1418 }
1097 } 1419 }
1098 get_counters(cnt_odd); 1420 /*
1421 * n.b. fork_it() does not check for errors from for_all_cpus()
1422 * because re-starting is problematic when forking
1423 */
1424 for_all_cpus(get_counters, ODD_COUNTERS);
1099 gettimeofday(&tv_odd, (struct timezone *)NULL); 1425 gettimeofday(&tv_odd, (struct timezone *)NULL);
1100 retval = compute_delta(cnt_odd, cnt_even, cnt_delta);
1101
1102 timersub(&tv_odd, &tv_even, &tv_delta); 1426 timersub(&tv_odd, &tv_even, &tv_delta);
1103 compute_average(cnt_delta, cnt_average); 1427 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
1104 if (!retval) 1428 compute_average(EVEN_COUNTERS);
1105 print_counters(cnt_delta); 1429 format_all_counters(EVEN_COUNTERS);
1430 flush_stderr();
1106 1431
1107 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 1432 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
1108 1433
@@ -1115,8 +1440,14 @@ void cmdline(int argc, char **argv)
1115 1440
1116 progname = argv[0]; 1441 progname = argv[0];
1117 1442
1118 while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { 1443 while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) {
1119 switch (opt) { 1444 switch (opt) {
1445 case 'c':
1446 show_core_only++;
1447 break;
1448 case 'p':
1449 show_pkg_only++;
1450 break;
1120 case 's': 1451 case 's':
1121 summary_only++; 1452 summary_only++;
1122 break; 1453 break;
@@ -1142,10 +1473,8 @@ int main(int argc, char **argv)
1142 cmdline(argc, argv); 1473 cmdline(argc, argv);
1143 1474
1144 if (verbose > 1) 1475 if (verbose > 1)
1145 fprintf(stderr, "turbostat Dec 6, 2010" 1476 fprintf(stderr, "turbostat v2.0 May 16, 2012"
1146 " - Len Brown <lenb@kernel.org>\n"); 1477 " - Len Brown <lenb@kernel.org>\n");
1147 if (verbose > 1)
1148 fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
1149 1478
1150 turbostat_init(); 1479 turbostat_init();
1151 1480