diff options
author | Len Brown <len.brown@intel.com> | 2012-06-04 00:56:40 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2012-07-19 22:26:14 -0400 |
commit | c98d5d9444732a032bc55d1a496bfa8439da9199 (patch) | |
tree | a6d3ba319f3c575c21f8bb7cc757c928135b6cbd /tools | |
parent | d3514abcf5b896a3a66d8b7c960a0018a52ebc2c (diff) |
tools/power: turbostat v2 - re-write for efficiency
Measuring large profoundly-idle configurations
requires turbostat to be more lightweight.
Otherwise, the operation of turbostat itself
can interfere with the measurements.
This re-write makes turbostat topology aware.
Hardware is accessed in "topology order".
Redundant hardware accesses are deleted.
Redundant output is deleted.
Also, output is buffered and
local RDTSC use replaces remote MSR access for TSC.
From a feature point of view, the output
looks different since redundant figures are absent.
Also, there are now -c and -p options -- to restrict
output to the 1st thread in each core, and the 1st
thread in each package, respectively. This is helpful
to reduce output on big systems, where more detail
than the "-s" system summary is desired.
Finally, periodic mode output is now on stdout, not stderr.
Turbostat v2 is also slightly more robust in
handling run-time CPU online/offline events,
as it now checks the actual map of on-line cpus rather
than just the total number of on-line cpus.
Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 1 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 77 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1329 |
3 files changed, 868 insertions, 539 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index fd8e1f1297aa..f85649554191 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | turbostat : turbostat.c | 1 | turbostat : turbostat.c |
2 | CFLAGS += -Wall | ||
2 | 3 | ||
3 | clean : | 4 | clean : |
4 | rm -f turbostat | 5 | rm -f turbostat |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index adf175f61496..74e44507dfe9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -27,7 +27,11 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. | |||
27 | on processors that additionally support C-state residency counters. | 27 | on processors that additionally support C-state residency counters. |
28 | 28 | ||
29 | .SS Options | 29 | .SS Options |
30 | The \fB-s\fP option prints only a 1-line summary for each sample interval. | 30 | The \fB-s\fP option limits output to a 1-line system summary for each interval. |
31 | .PP | ||
32 | The \fB-c\fP option limits output to the 1st thread in each core. | ||
33 | .PP | ||
34 | The \fB-p\fP option limits output to the 1st thread in each package. | ||
31 | .PP | 35 | .PP |
32 | The \fB-v\fP option increases verbosity. | 36 | The \fB-v\fP option increases verbosity. |
33 | .PP | 37 | .PP |
@@ -65,19 +69,19 @@ Subsequent rows show per-CPU statistics. | |||
65 | .nf | 69 | .nf |
66 | [root@x980]# ./turbostat | 70 | [root@x980]# ./turbostat |
67 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 71 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
68 | 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 | 72 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 |
69 | 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 | 73 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 |
70 | 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 | 74 | 0 6 0.05 1.62 3.38 10.34 |
71 | 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 | 75 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 |
72 | 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 | 76 | 1 8 0.03 1.62 3.38 0.06 |
73 | 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 | 77 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 |
74 | 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 | 78 | 2 10 0.02 1.62 3.38 0.29 |
75 | 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 | 79 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 |
76 | 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 | 80 | 8 7 0.01 1.62 3.38 0.06 |
77 | 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 | 81 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 |
78 | 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 | 82 | 9 9 0.02 1.62 3.38 0.60 |
79 | 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 | 83 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 |
80 | 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 | 84 | 10 11 0.02 1.62 3.38 0.02 |
81 | .fi | 85 | .fi |
82 | .SH SUMMARY EXAMPLE | 86 | .SH SUMMARY EXAMPLE |
83 | The "-s" option prints the column headers just once, | 87 | The "-s" option prints the column headers just once, |
@@ -86,9 +90,10 @@ and then the one line system summary for each sample interval. | |||
86 | .nf | 90 | .nf |
87 | [root@x980]# ./turbostat -s | 91 | [root@x980]# ./turbostat -s |
88 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 92 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
89 | 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 | 93 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 |
90 | 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 | 94 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 |
91 | 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 | 95 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 |
96 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | ||
92 | .fi | 97 | .fi |
93 | .SH VERBOSE EXAMPLE | 98 | .SH VERBOSE EXAMPLE |
94 | The "-v" option adds verbosity to the output: | 99 | The "-v" option adds verbosity to the output: |
@@ -120,30 +125,28 @@ until ^C while the other CPUs are mostly idle: | |||
120 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null | 125 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null |
121 | ^C | 126 | ^C |
122 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 127 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
123 | 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 | 128 | 8.86 3.61 3.38 15.06 31.19 44.89 0.00 0.00 |
124 | 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 | 129 | 0 0 1.46 3.22 3.38 16.84 29.48 52.22 0.00 0.00 |
125 | 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 | 130 | 0 6 0.21 3.06 3.38 18.09 |
126 | 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 | 131 | 1 2 0.53 3.33 3.38 2.80 46.40 50.27 |
127 | 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 | 132 | 1 8 0.89 3.47 3.38 2.44 |
128 | 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 | 133 | 2 4 1.36 3.43 3.38 9.04 23.71 65.89 |
129 | 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 | 134 | 2 10 0.18 2.86 3.38 10.22 |
130 | 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 | 135 | 8 1 0.04 2.87 3.38 99.96 0.01 0.00 |
131 | 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 | 136 | 8 7 99.72 3.63 3.38 0.27 |
132 | 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 | 137 | 9 3 0.31 3.21 3.38 7.64 56.55 35.50 |
133 | 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 | 138 | 9 9 0.08 2.95 3.38 7.88 |
134 | 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 | 139 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
135 | 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 | 140 | 10 11 0.16 2.88 3.38 3.40 |
136 | 4.907015 sec | ||
137 | |||
138 | .fi | 141 | .fi |
139 | Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit | 142 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit |
140 | while the other processors are generally in various states of idle. | 143 | while the other processors are generally in various states of idle. |
141 | 144 | ||
142 | Note that cpu0 is an HT sibling sharing core0 | 145 | Note that cpu1 and cpu7 are HT siblings within core8. |
143 | with cpu6, and thus it is unable to get to an idle state | 146 | As cpu7 is very busy, it prevents its sibling, cpu1, |
144 | deeper than c1 while cpu6 is busy. | 147 | from entering a c-state deeper than c1. |
145 | 148 | ||
146 | Note that turbostat reports average GHz of 3.64, while | 149 | Note that turbostat reports average GHz of 3.63, while |
147 | the arithmetic average of the GHz column above is lower. | 150 | the arithmetic average of the GHz column above is lower. |
148 | This is a weighted average, where the weight is %c0. ie. it is the total number of | 151 | This is a weighted average, where the weight is %c0. ie. it is the total number of |
149 | un-halted cycles elapsed per time divided by the number of CPUs. | 152 | un-halted cycles elapsed per time divided by the number of CPUs. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 16de7ad4850f..b815a12159b2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -67,92 +67,119 @@ double bclk; | |||
67 | unsigned int show_pkg; | 67 | unsigned int show_pkg; |
68 | unsigned int show_core; | 68 | unsigned int show_core; |
69 | unsigned int show_cpu; | 69 | unsigned int show_cpu; |
70 | unsigned int show_pkg_only; | ||
71 | unsigned int show_core_only; | ||
72 | char *output_buffer, *outp; | ||
70 | 73 | ||
71 | int aperf_mperf_unstable; | 74 | int aperf_mperf_unstable; |
72 | int backwards_count; | 75 | int backwards_count; |
73 | char *progname; | 76 | char *progname; |
74 | 77 | ||
75 | int num_cpus; | 78 | cpu_set_t *cpu_present_set, *cpu_affinity_set; |
76 | cpu_set_t *cpu_present_set, *cpu_mask; | 79 | size_t cpu_present_setsize, cpu_affinity_setsize; |
77 | size_t cpu_present_setsize, cpu_mask_size; | 80 | |
78 | 81 | struct thread_data { | |
79 | struct counters { | 82 | unsigned long long tsc; |
80 | unsigned long long tsc; /* per thread */ | 83 | unsigned long long aperf; |
81 | unsigned long long aperf; /* per thread */ | 84 | unsigned long long mperf; |
82 | unsigned long long mperf; /* per thread */ | 85 | unsigned long long c1; /* derived */ |
83 | unsigned long long c1; /* per thread (calculated) */ | 86 | unsigned long long extra_msr; |
84 | unsigned long long c3; /* per core */ | 87 | unsigned int cpu_id; |
85 | unsigned long long c6; /* per core */ | 88 | unsigned int flags; |
86 | unsigned long long c7; /* per core */ | 89 | #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 |
87 | unsigned long long pc2; /* per package */ | 90 | #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 |
88 | unsigned long long pc3; /* per package */ | 91 | } *thread_even, *thread_odd; |
89 | unsigned long long pc6; /* per package */ | 92 | |
90 | unsigned long long pc7; /* per package */ | 93 | struct core_data { |
91 | unsigned long long extra_msr; /* per thread */ | 94 | unsigned long long c3; |
92 | int pkg; | 95 | unsigned long long c6; |
93 | int core; | 96 | unsigned long long c7; |
94 | int cpu; | 97 | unsigned int core_id; |
95 | struct counters *next; | 98 | } *core_even, *core_odd; |
96 | }; | 99 | |
97 | 100 | struct pkg_data { | |
98 | struct counters *cnt_even; | 101 | unsigned long long pc2; |
99 | struct counters *cnt_odd; | 102 | unsigned long long pc3; |
100 | struct counters *cnt_delta; | 103 | unsigned long long pc6; |
101 | struct counters *cnt_average; | 104 | unsigned long long pc7; |
102 | struct timeval tv_even; | 105 | unsigned int package_id; |
103 | struct timeval tv_odd; | 106 | } *package_even, *package_odd; |
104 | struct timeval tv_delta; | 107 | |
105 | 108 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | |
106 | int mark_cpu_present(int pkg, int core, int cpu) | 109 | #define EVEN_COUNTERS thread_even, core_even, package_even |
110 | |||
111 | #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ | ||
112 | (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ | ||
113 | topo.num_threads_per_core + \ | ||
114 | (core_no) * topo.num_threads_per_core + (thread_no)) | ||
115 | #define GET_CORE(core_base, core_no, pkg_no) \ | ||
116 | (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) | ||
117 | #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) | ||
118 | |||
119 | struct system_summary { | ||
120 | struct thread_data threads; | ||
121 | struct core_data cores; | ||
122 | struct pkg_data packages; | ||
123 | } sum, average; | ||
124 | |||
125 | |||
126 | struct topo_params { | ||
127 | int num_packages; | ||
128 | int num_cpus; | ||
129 | int num_cores; | ||
130 | int max_cpu_num; | ||
131 | int num_cores_per_pkg; | ||
132 | int num_threads_per_core; | ||
133 | } topo; | ||
134 | |||
135 | struct timeval tv_even, tv_odd, tv_delta; | ||
136 | |||
137 | void setup_all_buffers(void); | ||
138 | |||
139 | int cpu_is_not_present(int cpu) | ||
107 | { | 140 | { |
108 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); | 141 | return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); |
109 | return 0; | ||
110 | } | 142 | } |
111 | |||
112 | /* | 143 | /* |
113 | * cpu_mask_init(ncpus) | 144 | * run func(thread, core, package) in topology order |
114 | * | 145 | * skip non-present cpus |
115 | * allocate and clear cpu_mask | ||
116 | * set cpu_mask_size | ||
117 | */ | 146 | */ |
118 | void cpu_mask_init(int ncpus) | 147 | |
148 | int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), | ||
149 | struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) | ||
119 | { | 150 | { |
120 | cpu_mask = CPU_ALLOC(ncpus); | 151 | int retval, pkg_no, core_no, thread_no; |
121 | if (cpu_mask == NULL) { | ||
122 | perror("CPU_ALLOC"); | ||
123 | exit(3); | ||
124 | } | ||
125 | cpu_mask_size = CPU_ALLOC_SIZE(ncpus); | ||
126 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | ||
127 | 152 | ||
128 | /* | 153 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { |
129 | * Allocate and initialize cpu_present_set | 154 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { |
130 | */ | 155 | for (thread_no = 0; thread_no < |
131 | cpu_present_set = CPU_ALLOC(ncpus); | 156 | topo.num_threads_per_core; ++thread_no) { |
132 | if (cpu_present_set == NULL) { | 157 | struct thread_data *t; |
133 | perror("CPU_ALLOC"); | 158 | struct core_data *c; |
134 | exit(3); | 159 | struct pkg_data *p; |
135 | } | ||
136 | cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); | ||
137 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
138 | for_all_cpus(mark_cpu_present); | ||
139 | } | ||
140 | 160 | ||
141 | void cpu_mask_uninit() | 161 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); |
142 | { | 162 | |
143 | CPU_FREE(cpu_mask); | 163 | if (cpu_is_not_present(t->cpu_id)) |
144 | cpu_mask = NULL; | 164 | continue; |
145 | cpu_mask_size = 0; | 165 | |
146 | CPU_FREE(cpu_present_set); | 166 | c = GET_CORE(core_base, core_no, pkg_no); |
147 | cpu_present_set = NULL; | 167 | p = GET_PKG(pkg_base, pkg_no); |
148 | cpu_present_setsize = 0; | 168 | |
169 | retval = func(t, c, p); | ||
170 | if (retval) | ||
171 | return retval; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | return 0; | ||
149 | } | 176 | } |
150 | 177 | ||
151 | int cpu_migrate(int cpu) | 178 | int cpu_migrate(int cpu) |
152 | { | 179 | { |
153 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | 180 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); |
154 | CPU_SET_S(cpu, cpu_mask_size, cpu_mask); | 181 | CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); |
155 | if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) | 182 | if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) |
156 | return -1; | 183 | return -1; |
157 | else | 184 | else |
158 | return 0; | 185 | return 0; |
@@ -181,67 +208,72 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
181 | void print_header(void) | 208 | void print_header(void) |
182 | { | 209 | { |
183 | if (show_pkg) | 210 | if (show_pkg) |
184 | fprintf(stderr, "pk"); | 211 | outp += sprintf(outp, "pk"); |
185 | if (show_pkg) | 212 | if (show_pkg) |
186 | fprintf(stderr, " "); | 213 | outp += sprintf(outp, " "); |
187 | if (show_core) | 214 | if (show_core) |
188 | fprintf(stderr, "cor"); | 215 | outp += sprintf(outp, "cor"); |
189 | if (show_cpu) | 216 | if (show_cpu) |
190 | fprintf(stderr, " CPU"); | 217 | outp += sprintf(outp, " CPU"); |
191 | if (show_pkg || show_core || show_cpu) | 218 | if (show_pkg || show_core || show_cpu) |
192 | fprintf(stderr, " "); | 219 | outp += sprintf(outp, " "); |
193 | if (do_nhm_cstates) | 220 | if (do_nhm_cstates) |
194 | fprintf(stderr, " %%c0"); | 221 | outp += sprintf(outp, " %%c0"); |
195 | if (has_aperf) | 222 | if (has_aperf) |
196 | fprintf(stderr, " GHz"); | 223 | outp += sprintf(outp, " GHz"); |
197 | fprintf(stderr, " TSC"); | 224 | outp += sprintf(outp, " TSC"); |
198 | if (do_nhm_cstates) | 225 | if (do_nhm_cstates) |
199 | fprintf(stderr, " %%c1"); | 226 | outp += sprintf(outp, " %%c1"); |
200 | if (do_nhm_cstates) | 227 | if (do_nhm_cstates) |
201 | fprintf(stderr, " %%c3"); | 228 | outp += sprintf(outp, " %%c3"); |
202 | if (do_nhm_cstates) | 229 | if (do_nhm_cstates) |
203 | fprintf(stderr, " %%c6"); | 230 | outp += sprintf(outp, " %%c6"); |
204 | if (do_snb_cstates) | 231 | if (do_snb_cstates) |
205 | fprintf(stderr, " %%c7"); | 232 | outp += sprintf(outp, " %%c7"); |
206 | if (do_snb_cstates) | 233 | if (do_snb_cstates) |
207 | fprintf(stderr, " %%pc2"); | 234 | outp += sprintf(outp, " %%pc2"); |
208 | if (do_nhm_cstates) | 235 | if (do_nhm_cstates) |
209 | fprintf(stderr, " %%pc3"); | 236 | outp += sprintf(outp, " %%pc3"); |
210 | if (do_nhm_cstates) | 237 | if (do_nhm_cstates) |
211 | fprintf(stderr, " %%pc6"); | 238 | outp += sprintf(outp, " %%pc6"); |
212 | if (do_snb_cstates) | 239 | if (do_snb_cstates) |
213 | fprintf(stderr, " %%pc7"); | 240 | outp += sprintf(outp, " %%pc7"); |
214 | if (extra_msr_offset) | 241 | if (extra_msr_offset) |
215 | fprintf(stderr, " MSR 0x%x ", extra_msr_offset); | 242 | outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); |
216 | 243 | ||
217 | putc('\n', stderr); | 244 | outp += sprintf(outp, "\n"); |
218 | } | 245 | } |
219 | 246 | ||
220 | void dump_cnt(struct counters *cnt) | 247 | int dump_counters(struct thread_data *t, struct core_data *c, |
248 | struct pkg_data *p) | ||
221 | { | 249 | { |
222 | if (!cnt) | 250 | fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); |
223 | return; | 251 | |
224 | if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg); | 252 | if (t) { |
225 | if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core); | 253 | fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); |
226 | if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu); | 254 | fprintf(stderr, "TSC: %016llX\n", t->tsc); |
227 | if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc); | 255 | fprintf(stderr, "aperf: %016llX\n", t->aperf); |
228 | if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3); | 256 | fprintf(stderr, "mperf: %016llX\n", t->mperf); |
229 | if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6); | 257 | fprintf(stderr, "c1: %016llX\n", t->c1); |
230 | if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7); | 258 | fprintf(stderr, "msr0x%x: %016llX\n", |
231 | if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf); | 259 | extra_msr_offset, t->extra_msr); |
232 | if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2); | 260 | } |
233 | if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3); | ||
234 | if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6); | ||
235 | if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7); | ||
236 | if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr); | ||
237 | } | ||
238 | 261 | ||
239 | void dump_list(struct counters *cnt) | 262 | if (c) { |
240 | { | 263 | fprintf(stderr, "core: %d\n", c->core_id); |
241 | printf("dump_list 0x%p\n", cnt); | 264 | fprintf(stderr, "c3: %016llX\n", c->c3); |
265 | fprintf(stderr, "c6: %016llX\n", c->c6); | ||
266 | fprintf(stderr, "c7: %016llX\n", c->c7); | ||
267 | } | ||
242 | 268 | ||
243 | for (; cnt; cnt = cnt->next) | 269 | if (p) { |
244 | dump_cnt(cnt); | 270 | fprintf(stderr, "package: %d\n", p->package_id); |
271 | fprintf(stderr, "pc2: %016llX\n", p->pc2); | ||
272 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | ||
273 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | ||
274 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | ||
275 | } | ||
276 | return 0; | ||
245 | } | 277 | } |
246 | 278 | ||
247 | /* | 279 | /* |
@@ -253,321 +285,385 @@ void dump_list(struct counters *cnt) | |||
253 | * TSC: "TSC" 3 columns %3.2 | 285 | * TSC: "TSC" 3 columns %3.2 |
254 | * percentage " %pc3" %6.2 | 286 | * percentage " %pc3" %6.2 |
255 | */ | 287 | */ |
256 | void print_cnt(struct counters *p) | 288 | int format_counters(struct thread_data *t, struct core_data *c, |
289 | struct pkg_data *p) | ||
257 | { | 290 | { |
258 | double interval_float; | 291 | double interval_float; |
259 | 292 | ||
293 | /* if showing only 1st thread in core and this isn't one, bail out */ | ||
294 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
295 | return 0; | ||
296 | |||
297 | /* if showing only 1st thread in pkg and this isn't one, bail out */ | ||
298 | if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
299 | return 0; | ||
300 | |||
260 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; | 301 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; |
261 | 302 | ||
262 | /* topology columns, print blanks on 1st (average) line */ | 303 | /* topo columns, print blanks on 1st (average) line */ |
263 | if (p == cnt_average) { | 304 | if (t == &average.threads) { |
264 | if (show_pkg) | 305 | if (show_pkg) |
265 | fprintf(stderr, " "); | 306 | outp += sprintf(outp, " "); |
266 | if (show_pkg && show_core) | 307 | if (show_pkg && show_core) |
267 | fprintf(stderr, " "); | 308 | outp += sprintf(outp, " "); |
268 | if (show_core) | 309 | if (show_core) |
269 | fprintf(stderr, " "); | 310 | outp += sprintf(outp, " "); |
270 | if (show_cpu) | 311 | if (show_cpu) |
271 | fprintf(stderr, " " " "); | 312 | outp += sprintf(outp, " " " "); |
272 | } else { | 313 | } else { |
273 | if (show_pkg) | 314 | if (show_pkg) { |
274 | fprintf(stderr, "%2d", p->pkg); | 315 | if (p) |
316 | outp += sprintf(outp, "%2d", p->package_id); | ||
317 | else | ||
318 | outp += sprintf(outp, " "); | ||
319 | } | ||
275 | if (show_pkg && show_core) | 320 | if (show_pkg && show_core) |
276 | fprintf(stderr, " "); | 321 | outp += sprintf(outp, " "); |
277 | if (show_core) | 322 | if (show_core) { |
278 | fprintf(stderr, "%3d", p->core); | 323 | if (c) |
324 | outp += sprintf(outp, "%3d", c->core_id); | ||
325 | else | ||
326 | outp += sprintf(outp, " "); | ||
327 | } | ||
279 | if (show_cpu) | 328 | if (show_cpu) |
280 | fprintf(stderr, " %3d", p->cpu); | 329 | outp += sprintf(outp, " %3d", t->cpu_id); |
281 | } | 330 | } |
282 | 331 | ||
283 | /* %c0 */ | 332 | /* %c0 */ |
284 | if (do_nhm_cstates) { | 333 | if (do_nhm_cstates) { |
285 | if (show_pkg || show_core || show_cpu) | 334 | if (show_pkg || show_core || show_cpu) |
286 | fprintf(stderr, " "); | 335 | outp += sprintf(outp, " "); |
287 | if (!skip_c0) | 336 | if (!skip_c0) |
288 | fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); | 337 | outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); |
289 | else | 338 | else |
290 | fprintf(stderr, " ****"); | 339 | outp += sprintf(outp, " ****"); |
291 | } | 340 | } |
292 | 341 | ||
293 | /* GHz */ | 342 | /* GHz */ |
294 | if (has_aperf) { | 343 | if (has_aperf) { |
295 | if (!aperf_mperf_unstable) { | 344 | if (!aperf_mperf_unstable) { |
296 | fprintf(stderr, " %3.2f", | 345 | outp += sprintf(outp, " %3.2f", |
297 | 1.0 * p->tsc / units * p->aperf / | 346 | 1.0 * t->tsc / units * t->aperf / |
298 | p->mperf / interval_float); | 347 | t->mperf / interval_float); |
299 | } else { | 348 | } else { |
300 | if (p->aperf > p->tsc || p->mperf > p->tsc) { | 349 | if (t->aperf > t->tsc || t->mperf > t->tsc) { |
301 | fprintf(stderr, " ***"); | 350 | outp += sprintf(outp, " ***"); |
302 | } else { | 351 | } else { |
303 | fprintf(stderr, "%3.1f*", | 352 | outp += sprintf(outp, "%3.1f*", |
304 | 1.0 * p->tsc / | 353 | 1.0 * t->tsc / |
305 | units * p->aperf / | 354 | units * t->aperf / |
306 | p->mperf / interval_float); | 355 | t->mperf / interval_float); |
307 | } | 356 | } |
308 | } | 357 | } |
309 | } | 358 | } |
310 | 359 | ||
311 | /* TSC */ | 360 | /* TSC */ |
312 | fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); | 361 | outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); |
313 | 362 | ||
314 | if (do_nhm_cstates) { | 363 | if (do_nhm_cstates) { |
315 | if (!skip_c1) | 364 | if (!skip_c1) |
316 | fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); | 365 | outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); |
317 | else | 366 | else |
318 | fprintf(stderr, " ****"); | 367 | outp += sprintf(outp, " ****"); |
319 | } | 368 | } |
369 | |||
370 | /* print per-core data only for 1st thread in core */ | ||
371 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
372 | goto done; | ||
373 | |||
320 | if (do_nhm_cstates) | 374 | if (do_nhm_cstates) |
321 | fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); | 375 | outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); |
322 | if (do_nhm_cstates) | 376 | if (do_nhm_cstates) |
323 | fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); | 377 | outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); |
324 | if (do_snb_cstates) | 378 | if (do_snb_cstates) |
325 | fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); | 379 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
380 | |||
381 | /* print per-package data only for 1st core in package */ | ||
382 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
383 | goto done; | ||
384 | |||
326 | if (do_snb_cstates) | 385 | if (do_snb_cstates) |
327 | fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); | 386 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
328 | if (do_nhm_cstates) | 387 | if (do_nhm_cstates) |
329 | fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); | 388 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); |
330 | if (do_nhm_cstates) | 389 | if (do_nhm_cstates) |
331 | fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); | 390 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
332 | if (do_snb_cstates) | 391 | if (do_snb_cstates) |
333 | fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); | 392 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
393 | done: | ||
334 | if (extra_msr_offset) | 394 | if (extra_msr_offset) |
335 | fprintf(stderr, " 0x%016llx", p->extra_msr); | 395 | outp += sprintf(outp, " 0x%016llx", t->extra_msr); |
336 | putc('\n', stderr); | 396 | outp += sprintf(outp, "\n"); |
397 | |||
398 | return 0; | ||
337 | } | 399 | } |
338 | 400 | ||
339 | void print_counters(struct counters *counters) | 401 | void flush_stdout() |
402 | { | ||
403 | fputs(output_buffer, stdout); | ||
404 | outp = output_buffer; | ||
405 | } | ||
406 | void flush_stderr() | ||
407 | { | ||
408 | fputs(output_buffer, stderr); | ||
409 | outp = output_buffer; | ||
410 | } | ||
411 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
340 | { | 412 | { |
341 | struct counters *cnt; | ||
342 | static int printed; | 413 | static int printed; |
343 | 414 | ||
344 | |||
345 | if (!printed || !summary_only) | 415 | if (!printed || !summary_only) |
346 | print_header(); | 416 | print_header(); |
347 | 417 | ||
348 | if (num_cpus > 1) | 418 | if (topo.num_cpus > 1) |
349 | print_cnt(cnt_average); | 419 | format_counters(&average.threads, &average.cores, |
420 | &average.packages); | ||
350 | 421 | ||
351 | printed = 1; | 422 | printed = 1; |
352 | 423 | ||
353 | if (summary_only) | 424 | if (summary_only) |
354 | return; | 425 | return; |
355 | 426 | ||
356 | for (cnt = counters; cnt != NULL; cnt = cnt->next) | 427 | for_all_cpus(format_counters, t, c, p); |
357 | print_cnt(cnt); | ||
358 | |||
359 | } | 428 | } |
360 | 429 | ||
361 | #define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) | 430 | void |
431 | delta_package(struct pkg_data *new, struct pkg_data *old) | ||
432 | { | ||
433 | old->pc2 = new->pc2 - old->pc2; | ||
434 | old->pc3 = new->pc3 - old->pc3; | ||
435 | old->pc6 = new->pc6 - old->pc6; | ||
436 | old->pc7 = new->pc7 - old->pc7; | ||
437 | } | ||
362 | 438 | ||
363 | int compute_delta(struct counters *after, | 439 | void |
364 | struct counters *before, struct counters *delta) | 440 | delta_core(struct core_data *new, struct core_data *old) |
365 | { | 441 | { |
366 | int errors = 0; | 442 | old->c3 = new->c3 - old->c3; |
367 | int perf_err = 0; | 443 | old->c6 = new->c6 - old->c6; |
444 | old->c7 = new->c7 - old->c7; | ||
445 | } | ||
368 | 446 | ||
369 | skip_c0 = skip_c1 = 0; | 447 | void |
448 | delta_thread(struct thread_data *new, struct thread_data *old, | ||
449 | struct core_data *core_delta) | ||
450 | { | ||
451 | old->tsc = new->tsc - old->tsc; | ||
452 | |||
453 | /* check for TSC < 1 Mcycles over interval */ | ||
454 | if (old->tsc < (1000 * 1000)) { | ||
455 | fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); | ||
456 | fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); | ||
457 | fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); | ||
458 | exit(-3); | ||
459 | } | ||
370 | 460 | ||
371 | for ( ; after && before && delta; | 461 | old->c1 = new->c1 - old->c1; |
372 | after = after->next, before = before->next, delta = delta->next) { | ||
373 | if (before->cpu != after->cpu) { | ||
374 | printf("cpu configuration changed: %d != %d\n", | ||
375 | before->cpu, after->cpu); | ||
376 | return -1; | ||
377 | } | ||
378 | 462 | ||
379 | if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { | 463 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { |
380 | fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", | 464 | old->aperf = new->aperf - old->aperf; |
381 | before->cpu, before->tsc, after->tsc); | 465 | old->mperf = new->mperf - old->mperf; |
382 | errors++; | 466 | } else { |
383 | } | ||
384 | /* check for TSC < 1 Mcycles over interval */ | ||
385 | if (delta->tsc < (1000 * 1000)) { | ||
386 | fprintf(stderr, "Insanely slow TSC rate," | ||
387 | " TSC stops in idle?\n"); | ||
388 | fprintf(stderr, "You can disable all c-states" | ||
389 | " by booting with \"idle=poll\"\n"); | ||
390 | fprintf(stderr, "or just the deep ones with" | ||
391 | " \"processor.max_cstate=1\"\n"); | ||
392 | exit(-3); | ||
393 | } | ||
394 | if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { | ||
395 | fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", | ||
396 | before->cpu, before->c3, after->c3); | ||
397 | errors++; | ||
398 | } | ||
399 | if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { | ||
400 | fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", | ||
401 | before->cpu, before->c6, after->c6); | ||
402 | errors++; | ||
403 | } | ||
404 | if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { | ||
405 | fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", | ||
406 | before->cpu, before->c7, after->c7); | ||
407 | errors++; | ||
408 | } | ||
409 | if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { | ||
410 | fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", | ||
411 | before->cpu, before->pc2, after->pc2); | ||
412 | errors++; | ||
413 | } | ||
414 | if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { | ||
415 | fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", | ||
416 | before->cpu, before->pc3, after->pc3); | ||
417 | errors++; | ||
418 | } | ||
419 | if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { | ||
420 | fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", | ||
421 | before->cpu, before->pc6, after->pc6); | ||
422 | errors++; | ||
423 | } | ||
424 | if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { | ||
425 | fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", | ||
426 | before->cpu, before->pc7, after->pc7); | ||
427 | errors++; | ||
428 | } | ||
429 | 467 | ||
430 | perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); | 468 | if (!aperf_mperf_unstable) { |
431 | if (perf_err) { | 469 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); |
432 | fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", | 470 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); |
433 | before->cpu, before->aperf, after->aperf); | 471 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); |
434 | } | ||
435 | perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); | ||
436 | if (perf_err) { | ||
437 | fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", | ||
438 | before->cpu, before->mperf, after->mperf); | ||
439 | } | ||
440 | if (perf_err) { | ||
441 | if (!aperf_mperf_unstable) { | ||
442 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | ||
443 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | ||
444 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | ||
445 | 472 | ||
446 | aperf_mperf_unstable = 1; | 473 | aperf_mperf_unstable = 1; |
447 | } | ||
448 | /* | ||
449 | * mperf delta is likely a huge "positive" number | ||
450 | * can not use it for calculating c0 time | ||
451 | */ | ||
452 | skip_c0 = 1; | ||
453 | skip_c1 = 1; | ||
454 | } | 474 | } |
455 | |||
456 | /* | 475 | /* |
457 | * As mperf and tsc collection are not atomic, | 476 | * mperf delta is likely a huge "positive" number |
458 | * it is possible for mperf's non-halted cycles | 477 | * can not use it for calculating c0 time |
459 | * to exceed TSC's all cycles: show c1 = 0% in that case. | ||
460 | */ | 478 | */ |
461 | if (delta->mperf > delta->tsc) | 479 | skip_c0 = 1; |
462 | delta->c1 = 0; | 480 | skip_c1 = 1; |
463 | else /* normal case, derive c1 */ | 481 | } |
464 | delta->c1 = delta->tsc - delta->mperf | ||
465 | - delta->c3 - delta->c6 - delta->c7; | ||
466 | 482 | ||
467 | if (delta->mperf == 0) | ||
468 | delta->mperf = 1; /* divide by 0 protection */ | ||
469 | 483 | ||
470 | /* | 484 | /* |
471 | * for "extra msr", just copy the latest w/o subtracting | 485 | * As mperf and tsc collection are not atomic, |
472 | */ | 486 | * it is possible for mperf's non-halted cycles |
473 | delta->extra_msr = after->extra_msr; | 487 | * to exceed TSC's all cycles: show c1 = 0% in that case. |
474 | if (errors) { | 488 | */ |
475 | fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); | 489 | if (old->mperf > old->tsc) |
476 | dump_cnt(before); | 490 | old->c1 = 0; |
477 | fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); | 491 | else { |
478 | dump_cnt(after); | 492 | /* normal case, derive c1 */ |
479 | errors = 0; | 493 | old->c1 = old->tsc - old->mperf - core_delta->c3 |
480 | } | 494 | - core_delta->c6 - core_delta->c7; |
495 | } | ||
496 | if (old->mperf == 0) { | ||
497 | if (verbose) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); | ||
498 | old->mperf = 1; /* divide by 0 protection */ | ||
481 | } | 499 | } |
500 | |||
501 | /* | ||
502 | * for "extra msr", just copy the latest w/o subtracting | ||
503 | */ | ||
504 | old->extra_msr = new->extra_msr; | ||
505 | } | ||
506 | |||
507 | int delta_cpu(struct thread_data *t, struct core_data *c, | ||
508 | struct pkg_data *p, struct thread_data *t2, | ||
509 | struct core_data *c2, struct pkg_data *p2) | ||
510 | { | ||
511 | /* calculate core delta only for 1st thread in core */ | ||
512 | if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) | ||
513 | delta_core(c, c2); | ||
514 | |||
515 | /* always calculate thread delta */ | ||
516 | delta_thread(t, t2, c2); /* c2 is core delta */ | ||
517 | |||
518 | /* calculate package delta only for 1st core in package */ | ||
519 | if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) | ||
520 | delta_package(p, p2); | ||
521 | |||
482 | return 0; | 522 | return 0; |
483 | } | 523 | } |
484 | 524 | ||
485 | void compute_average(struct counters *delta, struct counters *avg) | 525 | void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
526 | { | ||
527 | t->tsc = 0; | ||
528 | t->aperf = 0; | ||
529 | t->mperf = 0; | ||
530 | t->c1 = 0; | ||
531 | |||
532 | /* tells format_counters to dump all fields from this set */ | ||
533 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
534 | |||
535 | c->c3 = 0; | ||
536 | c->c6 = 0; | ||
537 | c->c7 = 0; | ||
538 | |||
539 | p->pc2 = 0; | ||
540 | p->pc3 = 0; | ||
541 | p->pc6 = 0; | ||
542 | p->pc7 = 0; | ||
543 | } | ||
544 | int sum_counters(struct thread_data *t, struct core_data *c, | ||
545 | struct pkg_data *p) | ||
486 | { | 546 | { |
487 | struct counters *sum; | 547 | average.threads.tsc += t->tsc; |
548 | average.threads.aperf += t->aperf; | ||
549 | average.threads.mperf += t->mperf; | ||
550 | average.threads.c1 += t->c1; | ||
488 | 551 | ||
489 | sum = calloc(1, sizeof(struct counters)); | 552 | /* sum per-core values only for 1st thread in core */ |
490 | if (sum == NULL) { | 553 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
491 | perror("calloc sum"); | 554 | return 0; |
492 | exit(1); | ||
493 | } | ||
494 | 555 | ||
495 | for (; delta; delta = delta->next) { | 556 | average.cores.c3 += c->c3; |
496 | sum->tsc += delta->tsc; | 557 | average.cores.c6 += c->c6; |
497 | sum->c1 += delta->c1; | 558 | average.cores.c7 += c->c7; |
498 | sum->c3 += delta->c3; | 559 | |
499 | sum->c6 += delta->c6; | 560 | /* sum per-pkg values only for 1st core in pkg */ |
500 | sum->c7 += delta->c7; | 561 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
501 | sum->aperf += delta->aperf; | 562 | return 0; |
502 | sum->mperf += delta->mperf; | 563 | |
503 | sum->pc2 += delta->pc2; | 564 | average.packages.pc2 += p->pc2; |
504 | sum->pc3 += delta->pc3; | 565 | average.packages.pc3 += p->pc3; |
505 | sum->pc6 += delta->pc6; | 566 | average.packages.pc6 += p->pc6; |
506 | sum->pc7 += delta->pc7; | 567 | average.packages.pc7 += p->pc7; |
507 | } | 568 | |
508 | avg->tsc = sum->tsc/num_cpus; | 569 | return 0; |
509 | avg->c1 = sum->c1/num_cpus; | 570 | } |
510 | avg->c3 = sum->c3/num_cpus; | 571 | /* |
511 | avg->c6 = sum->c6/num_cpus; | 572 | * sum the counters for all cpus in the system |
512 | avg->c7 = sum->c7/num_cpus; | 573 | * compute the weighted average |
513 | avg->aperf = sum->aperf/num_cpus; | 574 | */ |
514 | avg->mperf = sum->mperf/num_cpus; | 575 | void compute_average(struct thread_data *t, struct core_data *c, |
515 | avg->pc2 = sum->pc2/num_cpus; | 576 | struct pkg_data *p) |
516 | avg->pc3 = sum->pc3/num_cpus; | 577 | { |
517 | avg->pc6 = sum->pc6/num_cpus; | 578 | clear_counters(&average.threads, &average.cores, &average.packages); |
518 | avg->pc7 = sum->pc7/num_cpus; | 579 | |
519 | 580 | for_all_cpus(sum_counters, t, c, p); | |
520 | free(sum); | 581 | |
582 | average.threads.tsc /= topo.num_cpus; | ||
583 | average.threads.aperf /= topo.num_cpus; | ||
584 | average.threads.mperf /= topo.num_cpus; | ||
585 | average.threads.c1 /= topo.num_cpus; | ||
586 | |||
587 | average.cores.c3 /= topo.num_cores; | ||
588 | average.cores.c6 /= topo.num_cores; | ||
589 | average.cores.c7 /= topo.num_cores; | ||
590 | |||
591 | average.packages.pc2 /= topo.num_packages; | ||
592 | average.packages.pc3 /= topo.num_packages; | ||
593 | average.packages.pc6 /= topo.num_packages; | ||
594 | average.packages.pc7 /= topo.num_packages; | ||
521 | } | 595 | } |
522 | 596 | ||
523 | int get_counters(struct counters *cnt) | 597 | static unsigned long long rdtsc(void) |
524 | { | 598 | { |
525 | for ( ; cnt; cnt = cnt->next) { | 599 | unsigned int low, high; |
526 | 600 | ||
527 | if (cpu_migrate(cnt->cpu)) | 601 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); |
528 | return -1; | ||
529 | 602 | ||
530 | if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) | 603 | return low | ((unsigned long long)high) << 32; |
531 | return -1; | 604 | } |
532 | 605 | ||
533 | if (has_aperf) { | ||
534 | if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) | ||
535 | return -1; | ||
536 | if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) | ||
537 | return -1; | ||
538 | } | ||
539 | 606 | ||
540 | if (do_nhm_cstates) { | 607 | /* |
541 | if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) | 608 | * get_counters(...) |
542 | return -1; | 609 | * migrate to cpu |
543 | if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) | 610 | * acquire and record local counters for that cpu |
544 | return -1; | 611 | */ |
545 | } | 612 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
613 | { | ||
614 | int cpu = t->cpu_id; | ||
546 | 615 | ||
547 | if (do_snb_cstates) | 616 | if (cpu_migrate(cpu)) |
548 | if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) | 617 | return -1; |
549 | return -1; | ||
550 | 618 | ||
551 | if (do_nhm_cstates) { | 619 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
552 | if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) | 620 | |
553 | return -1; | 621 | if (has_aperf) { |
554 | if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) | 622 | if (get_msr(cpu, MSR_APERF, &t->aperf)) |
555 | return -1; | 623 | return -3; |
556 | } | 624 | if (get_msr(cpu, MSR_MPERF, &t->mperf)) |
557 | if (do_snb_cstates) { | 625 | return -4; |
558 | if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) | 626 | } |
559 | return -1; | 627 | |
560 | if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) | 628 | if (extra_msr_offset) |
561 | return -1; | 629 | if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) |
562 | } | 630 | return -5; |
563 | if (extra_msr_offset) | 631 | |
564 | if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) | 632 | /* collect core counters only for 1st thread in core */ |
565 | return -1; | 633 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
634 | return 0; | ||
635 | |||
636 | if (do_nhm_cstates) { | ||
637 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | ||
638 | return -6; | ||
639 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) | ||
640 | return -7; | ||
641 | } | ||
642 | |||
643 | if (do_snb_cstates) | ||
644 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | ||
645 | return -8; | ||
646 | |||
647 | /* collect package counters only for 1st core in package */ | ||
648 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
649 | return 0; | ||
650 | |||
651 | if (do_nhm_cstates) { | ||
652 | if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) | ||
653 | return -9; | ||
654 | if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) | ||
655 | return -10; | ||
656 | } | ||
657 | if (do_snb_cstates) { | ||
658 | if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) | ||
659 | return -11; | ||
660 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | ||
661 | return -12; | ||
566 | } | 662 | } |
567 | return 0; | 663 | return 0; |
568 | } | 664 | } |
569 | 665 | ||
570 | void print_nehalem_info(void) | 666 | void print_verbose_header(void) |
571 | { | 667 | { |
572 | unsigned long long msr; | 668 | unsigned long long msr; |
573 | unsigned int ratio; | 669 | unsigned int ratio; |
@@ -615,143 +711,82 @@ void print_nehalem_info(void) | |||
615 | 711 | ||
616 | } | 712 | } |
617 | 713 | ||
618 | void free_counter_list(struct counters *list) | 714 | void free_all_buffers(void) |
619 | { | 715 | { |
620 | struct counters *p; | 716 | CPU_FREE(cpu_present_set); |
717 | cpu_present_set = NULL; | ||
718 | cpu_present_set = 0; | ||
621 | 719 | ||
622 | for (p = list; p; ) { | 720 | CPU_FREE(cpu_affinity_set); |
623 | struct counters *free_me; | 721 | cpu_affinity_set = NULL; |
722 | cpu_affinity_setsize = 0; | ||
624 | 723 | ||
625 | free_me = p; | 724 | free(thread_even); |
626 | p = p->next; | 725 | free(core_even); |
627 | free(free_me); | 726 | free(package_even); |
628 | } | ||
629 | } | ||
630 | 727 | ||
631 | void free_all_counters(void) | 728 | thread_even = NULL; |
632 | { | 729 | core_even = NULL; |
633 | free_counter_list(cnt_even); | 730 | package_even = NULL; |
634 | cnt_even = NULL; | ||
635 | 731 | ||
636 | free_counter_list(cnt_odd); | 732 | free(thread_odd); |
637 | cnt_odd = NULL; | 733 | free(core_odd); |
734 | free(package_odd); | ||
638 | 735 | ||
639 | free_counter_list(cnt_delta); | 736 | thread_odd = NULL; |
640 | cnt_delta = NULL; | 737 | core_odd = NULL; |
738 | package_odd = NULL; | ||
641 | 739 | ||
642 | free_counter_list(cnt_average); | 740 | free(output_buffer); |
643 | cnt_average = NULL; | 741 | output_buffer = NULL; |
742 | outp = NULL; | ||
644 | } | 743 | } |
645 | 744 | ||
646 | void insert_counters(struct counters **list, | 745 | /* |
647 | struct counters *new) | 746 | * cpu_is_first_sibling_in_core(cpu) |
747 | * return 1 if given CPU is 1st HT sibling in the core | ||
748 | */ | ||
749 | int cpu_is_first_sibling_in_core(int cpu) | ||
648 | { | 750 | { |
649 | struct counters *prev; | 751 | char path[64]; |
650 | 752 | FILE *filep; | |
651 | /* | 753 | int first_cpu; |
652 | * list was empty | ||
653 | */ | ||
654 | if (*list == NULL) { | ||
655 | new->next = *list; | ||
656 | *list = new; | ||
657 | return; | ||
658 | } | ||
659 | |||
660 | if (!summary_only) | ||
661 | show_cpu = 1; /* there is more than one CPU */ | ||
662 | |||
663 | /* | ||
664 | * insert on front of list. | ||
665 | * It is sorted by ascending package#, core#, cpu# | ||
666 | */ | ||
667 | if (((*list)->pkg > new->pkg) || | ||
668 | (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || | ||
669 | (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { | ||
670 | new->next = *list; | ||
671 | *list = new; | ||
672 | return; | ||
673 | } | ||
674 | |||
675 | prev = *list; | ||
676 | |||
677 | while (prev->next && (prev->next->pkg < new->pkg)) { | ||
678 | prev = prev->next; | ||
679 | if (!summary_only) | ||
680 | show_pkg = 1; /* there is more than 1 package */ | ||
681 | } | ||
682 | |||
683 | while (prev->next && (prev->next->pkg == new->pkg) | ||
684 | && (prev->next->core < new->core)) { | ||
685 | prev = prev->next; | ||
686 | if (!summary_only) | ||
687 | show_core = 1; /* there is more than 1 core */ | ||
688 | } | ||
689 | 754 | ||
690 | while (prev->next && (prev->next->pkg == new->pkg) | 755 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); |
691 | && (prev->next->core == new->core) | 756 | filep = fopen(path, "r"); |
692 | && (prev->next->cpu < new->cpu)) { | 757 | if (filep == NULL) { |
693 | prev = prev->next; | 758 | perror(path); |
759 | exit(1); | ||
694 | } | 760 | } |
695 | 761 | fscanf(filep, "%d", &first_cpu); | |
696 | /* | 762 | fclose(filep); |
697 | * insert after "prev" | 763 | return (cpu == first_cpu); |
698 | */ | ||
699 | new->next = prev->next; | ||
700 | prev->next = new; | ||
701 | } | 764 | } |
702 | 765 | ||
703 | void alloc_new_counters(int pkg, int core, int cpu) | 766 | /* |
767 | * cpu_is_first_core_in_package(cpu) | ||
768 | * return 1 if given CPU is 1st core in package | ||
769 | */ | ||
770 | int cpu_is_first_core_in_package(int cpu) | ||
704 | { | 771 | { |
705 | struct counters *new; | 772 | char path[64]; |
706 | 773 | FILE *filep; | |
707 | if (verbose > 1) | 774 | int first_cpu; |
708 | printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); | ||
709 | |||
710 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
711 | if (new == NULL) { | ||
712 | perror("calloc"); | ||
713 | exit(1); | ||
714 | } | ||
715 | new->pkg = pkg; | ||
716 | new->core = core; | ||
717 | new->cpu = cpu; | ||
718 | insert_counters(&cnt_odd, new); | ||
719 | |||
720 | new = (struct counters *)calloc(1, | ||
721 | sizeof(struct counters)); | ||
722 | if (new == NULL) { | ||
723 | perror("calloc"); | ||
724 | exit(1); | ||
725 | } | ||
726 | new->pkg = pkg; | ||
727 | new->core = core; | ||
728 | new->cpu = cpu; | ||
729 | insert_counters(&cnt_even, new); | ||
730 | |||
731 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
732 | if (new == NULL) { | ||
733 | perror("calloc"); | ||
734 | exit(1); | ||
735 | } | ||
736 | new->pkg = pkg; | ||
737 | new->core = core; | ||
738 | new->cpu = cpu; | ||
739 | insert_counters(&cnt_delta, new); | ||
740 | 775 | ||
741 | new = (struct counters *)calloc(1, sizeof(struct counters)); | 776 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); |
742 | if (new == NULL) { | 777 | filep = fopen(path, "r"); |
743 | perror("calloc"); | 778 | if (filep == NULL) { |
779 | perror(path); | ||
744 | exit(1); | 780 | exit(1); |
745 | } | 781 | } |
746 | new->pkg = pkg; | 782 | fscanf(filep, "%d", &first_cpu); |
747 | new->core = core; | 783 | fclose(filep); |
748 | new->cpu = cpu; | 784 | return (cpu == first_cpu); |
749 | cnt_average = new; | ||
750 | } | 785 | } |
751 | 786 | ||
752 | int get_physical_package_id(int cpu) | 787 | int get_physical_package_id(int cpu) |
753 | { | 788 | { |
754 | char path[64]; | 789 | char path[80]; |
755 | FILE *filep; | 790 | FILE *filep; |
756 | int pkg; | 791 | int pkg; |
757 | 792 | ||
@@ -768,7 +803,7 @@ int get_physical_package_id(int cpu) | |||
768 | 803 | ||
769 | int get_core_id(int cpu) | 804 | int get_core_id(int cpu) |
770 | { | 805 | { |
771 | char path[64]; | 806 | char path[80]; |
772 | FILE *filep; | 807 | FILE *filep; |
773 | int core; | 808 | int core; |
774 | 809 | ||
@@ -783,14 +818,87 @@ int get_core_id(int cpu) | |||
783 | return core; | 818 | return core; |
784 | } | 819 | } |
785 | 820 | ||
821 | int get_num_ht_siblings(int cpu) | ||
822 | { | ||
823 | char path[80]; | ||
824 | FILE *filep; | ||
825 | int sib1, sib2; | ||
826 | int matches; | ||
827 | char character; | ||
828 | |||
829 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | ||
830 | filep = fopen(path, "r"); | ||
831 | if (filep == NULL) { | ||
832 | perror(path); | ||
833 | exit(1); | ||
834 | } | ||
835 | /* | ||
836 | * file format: | ||
837 | * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) | ||
838 | * otherwinse 1 sibling (self). | ||
839 | */ | ||
840 | matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); | ||
841 | |||
842 | fclose(filep); | ||
843 | |||
844 | if (matches == 3) | ||
845 | return 2; | ||
846 | else | ||
847 | return 1; | ||
848 | } | ||
849 | |||
786 | /* | 850 | /* |
787 | * run func(pkg, core, cpu) on every cpu in /proc/stat | 851 | * run func(thread, core, package) in topology order |
852 | * skip non-present cpus | ||
788 | */ | 853 | */ |
789 | 854 | ||
790 | int for_all_cpus(void (func)(int, int, int)) | 855 | int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, |
856 | struct pkg_data *, struct thread_data *, struct core_data *, | ||
857 | struct pkg_data *), struct thread_data *thread_base, | ||
858 | struct core_data *core_base, struct pkg_data *pkg_base, | ||
859 | struct thread_data *thread_base2, struct core_data *core_base2, | ||
860 | struct pkg_data *pkg_base2) | ||
861 | { | ||
862 | int retval, pkg_no, core_no, thread_no; | ||
863 | |||
864 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { | ||
865 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { | ||
866 | for (thread_no = 0; thread_no < | ||
867 | topo.num_threads_per_core; ++thread_no) { | ||
868 | struct thread_data *t, *t2; | ||
869 | struct core_data *c, *c2; | ||
870 | struct pkg_data *p, *p2; | ||
871 | |||
872 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); | ||
873 | |||
874 | if (cpu_is_not_present(t->cpu_id)) | ||
875 | continue; | ||
876 | |||
877 | t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); | ||
878 | |||
879 | c = GET_CORE(core_base, core_no, pkg_no); | ||
880 | c2 = GET_CORE(core_base2, core_no, pkg_no); | ||
881 | |||
882 | p = GET_PKG(pkg_base, pkg_no); | ||
883 | p2 = GET_PKG(pkg_base2, pkg_no); | ||
884 | |||
885 | retval = func(t, c, p, t2, c2, p2); | ||
886 | if (retval) | ||
887 | return retval; | ||
888 | } | ||
889 | } | ||
890 | } | ||
891 | return 0; | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * run func(cpu) on every cpu in /proc/stat | ||
896 | * return max_cpu number | ||
897 | */ | ||
898 | int for_all_proc_cpus(int (func)(int)) | ||
791 | { | 899 | { |
792 | FILE *fp; | 900 | FILE *fp; |
793 | int cpu_count; | 901 | int cpu_num; |
794 | int retval; | 902 | int retval; |
795 | 903 | ||
796 | fp = fopen(proc_stat, "r"); | 904 | fp = fopen(proc_stat, "r"); |
@@ -805,78 +913,88 @@ int for_all_cpus(void (func)(int, int, int)) | |||
805 | exit(1); | 913 | exit(1); |
806 | } | 914 | } |
807 | 915 | ||
808 | for (cpu_count = 0; ; cpu_count++) { | 916 | while (1) { |
809 | int cpu; | 917 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); |
810 | |||
811 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); | ||
812 | if (retval != 1) | 918 | if (retval != 1) |
813 | break; | 919 | break; |
814 | 920 | ||
815 | func(get_physical_package_id(cpu), get_core_id(cpu), cpu); | 921 | retval = func(cpu_num); |
922 | if (retval) { | ||
923 | fclose(fp); | ||
924 | return(retval); | ||
925 | } | ||
816 | } | 926 | } |
817 | fclose(fp); | 927 | fclose(fp); |
818 | return cpu_count; | 928 | return 0; |
819 | } | 929 | } |
820 | 930 | ||
821 | void re_initialize(void) | 931 | void re_initialize(void) |
822 | { | 932 | { |
823 | free_all_counters(); | 933 | free_all_buffers(); |
824 | num_cpus = for_all_cpus(alloc_new_counters); | 934 | setup_all_buffers(); |
825 | cpu_mask_uninit(); | 935 | printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); |
826 | cpu_mask_init(num_cpus); | ||
827 | printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); | ||
828 | } | 936 | } |
829 | 937 | ||
830 | void dummy(int pkg, int core, int cpu) { return; } | 938 | |
831 | /* | 939 | /* |
832 | * check to see if a cpu came on-line | 940 | * count_cpus() |
941 | * remember the last one seen, it will be the max | ||
833 | */ | 942 | */ |
834 | int verify_num_cpus(void) | 943 | int count_cpus(int cpu) |
835 | { | 944 | { |
836 | int new_num_cpus; | 945 | if (topo.max_cpu_num < cpu) |
946 | topo.max_cpu_num = cpu; | ||
837 | 947 | ||
838 | new_num_cpus = for_all_cpus(dummy); | 948 | topo.num_cpus += 1; |
839 | 949 | return 0; | |
840 | if (new_num_cpus != num_cpus) { | 950 | } |
841 | if (verbose) | 951 | int mark_cpu_present(int cpu) |
842 | printf("num_cpus was %d, is now %d\n", | 952 | { |
843 | num_cpus, new_num_cpus); | 953 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); |
844 | return -1; | ||
845 | } | ||
846 | return 0; | 954 | return 0; |
847 | } | 955 | } |
848 | 956 | ||
849 | void turbostat_loop() | 957 | void turbostat_loop() |
850 | { | 958 | { |
959 | int retval; | ||
960 | |||
851 | restart: | 961 | restart: |
852 | get_counters(cnt_even); | 962 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
963 | if (retval) { | ||
964 | re_initialize(); | ||
965 | goto restart; | ||
966 | } | ||
853 | gettimeofday(&tv_even, (struct timezone *)NULL); | 967 | gettimeofday(&tv_even, (struct timezone *)NULL); |
854 | 968 | ||
855 | while (1) { | 969 | while (1) { |
856 | if (verify_num_cpus()) { | 970 | if (for_all_proc_cpus(cpu_is_not_present)) { |
857 | re_initialize(); | 971 | re_initialize(); |
858 | goto restart; | 972 | goto restart; |
859 | } | 973 | } |
860 | sleep(interval_sec); | 974 | sleep(interval_sec); |
861 | if (get_counters(cnt_odd)) { | 975 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
976 | if (retval) { | ||
862 | re_initialize(); | 977 | re_initialize(); |
863 | goto restart; | 978 | goto restart; |
864 | } | 979 | } |
865 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 980 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
866 | compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
867 | timersub(&tv_odd, &tv_even, &tv_delta); | 981 | timersub(&tv_odd, &tv_even, &tv_delta); |
868 | compute_average(cnt_delta, cnt_average); | 982 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
869 | print_counters(cnt_delta); | 983 | compute_average(EVEN_COUNTERS); |
984 | format_all_counters(EVEN_COUNTERS); | ||
985 | flush_stdout(); | ||
870 | sleep(interval_sec); | 986 | sleep(interval_sec); |
871 | if (get_counters(cnt_even)) { | 987 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
988 | if (retval) { | ||
872 | re_initialize(); | 989 | re_initialize(); |
873 | goto restart; | 990 | goto restart; |
874 | } | 991 | } |
875 | gettimeofday(&tv_even, (struct timezone *)NULL); | 992 | gettimeofday(&tv_even, (struct timezone *)NULL); |
876 | compute_delta(cnt_even, cnt_odd, cnt_delta); | ||
877 | timersub(&tv_even, &tv_odd, &tv_delta); | 993 | timersub(&tv_even, &tv_odd, &tv_delta); |
878 | compute_average(cnt_delta, cnt_average); | 994 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); |
879 | print_counters(cnt_delta); | 995 | compute_average(ODD_COUNTERS); |
996 | format_all_counters(ODD_COUNTERS); | ||
997 | flush_stdout(); | ||
880 | } | 998 | } |
881 | } | 999 | } |
882 | 1000 | ||
@@ -1051,6 +1169,208 @@ int open_dev_cpu_msr(int dummy1) | |||
1051 | return 0; | 1169 | return 0; |
1052 | } | 1170 | } |
1053 | 1171 | ||
1172 | void topology_probe() | ||
1173 | { | ||
1174 | int i; | ||
1175 | int max_core_id = 0; | ||
1176 | int max_package_id = 0; | ||
1177 | int max_siblings = 0; | ||
1178 | struct cpu_topology { | ||
1179 | int core_id; | ||
1180 | int physical_package_id; | ||
1181 | } *cpus; | ||
1182 | |||
1183 | /* Initialize num_cpus, max_cpu_num */ | ||
1184 | topo.num_cpus = 0; | ||
1185 | topo.max_cpu_num = 0; | ||
1186 | for_all_proc_cpus(count_cpus); | ||
1187 | if (!summary_only && topo.num_cpus > 1) | ||
1188 | show_cpu = 1; | ||
1189 | |||
1190 | if (verbose > 1) | ||
1191 | fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); | ||
1192 | |||
1193 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); | ||
1194 | if (cpus == NULL) { | ||
1195 | perror("calloc cpus"); | ||
1196 | exit(1); | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * Allocate and initialize cpu_present_set | ||
1201 | */ | ||
1202 | cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
1203 | if (cpu_present_set == NULL) { | ||
1204 | perror("CPU_ALLOC"); | ||
1205 | exit(3); | ||
1206 | } | ||
1207 | cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
1208 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
1209 | for_all_proc_cpus(mark_cpu_present); | ||
1210 | |||
1211 | /* | ||
1212 | * Allocate and initialize cpu_affinity_set | ||
1213 | */ | ||
1214 | cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
1215 | if (cpu_affinity_set == NULL) { | ||
1216 | perror("CPU_ALLOC"); | ||
1217 | exit(3); | ||
1218 | } | ||
1219 | cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
1220 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); | ||
1221 | |||
1222 | |||
1223 | /* | ||
1224 | * For online cpus | ||
1225 | * find max_core_id, max_package_id | ||
1226 | */ | ||
1227 | for (i = 0; i <= topo.max_cpu_num; ++i) { | ||
1228 | int siblings; | ||
1229 | |||
1230 | if (cpu_is_not_present(i)) { | ||
1231 | if (verbose > 1) | ||
1232 | fprintf(stderr, "cpu%d NOT PRESENT\n", i); | ||
1233 | continue; | ||
1234 | } | ||
1235 | cpus[i].core_id = get_core_id(i); | ||
1236 | if (cpus[i].core_id > max_core_id) | ||
1237 | max_core_id = cpus[i].core_id; | ||
1238 | |||
1239 | cpus[i].physical_package_id = get_physical_package_id(i); | ||
1240 | if (cpus[i].physical_package_id > max_package_id) | ||
1241 | max_package_id = cpus[i].physical_package_id; | ||
1242 | |||
1243 | siblings = get_num_ht_siblings(i); | ||
1244 | if (siblings > max_siblings) | ||
1245 | max_siblings = siblings; | ||
1246 | if (verbose > 1) | ||
1247 | fprintf(stderr, "cpu %d pkg %d core %d\n", | ||
1248 | i, cpus[i].physical_package_id, cpus[i].core_id); | ||
1249 | } | ||
1250 | topo.num_cores_per_pkg = max_core_id + 1; | ||
1251 | if (verbose > 1) | ||
1252 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | ||
1253 | max_core_id, topo.num_cores_per_pkg); | ||
1254 | if (!summary_only && topo.num_cores_per_pkg > 1) | ||
1255 | show_core = 1; | ||
1256 | |||
1257 | topo.num_packages = max_package_id + 1; | ||
1258 | if (verbose > 1) | ||
1259 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | ||
1260 | max_package_id, topo.num_packages); | ||
1261 | if (!summary_only && topo.num_packages > 1) | ||
1262 | show_pkg = 1; | ||
1263 | |||
1264 | topo.num_threads_per_core = max_siblings; | ||
1265 | if (verbose > 1) | ||
1266 | fprintf(stderr, "max_siblings %d\n", max_siblings); | ||
1267 | |||
1268 | free(cpus); | ||
1269 | } | ||
1270 | |||
1271 | void | ||
1272 | allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) | ||
1273 | { | ||
1274 | int i; | ||
1275 | |||
1276 | *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * | ||
1277 | topo.num_packages, sizeof(struct thread_data)); | ||
1278 | if (*t == NULL) | ||
1279 | goto error; | ||
1280 | |||
1281 | for (i = 0; i < topo.num_threads_per_core * | ||
1282 | topo.num_cores_per_pkg * topo.num_packages; i++) | ||
1283 | (*t)[i].cpu_id = -1; | ||
1284 | |||
1285 | *c = calloc(topo.num_cores_per_pkg * topo.num_packages, | ||
1286 | sizeof(struct core_data)); | ||
1287 | if (*c == NULL) | ||
1288 | goto error; | ||
1289 | |||
1290 | for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) | ||
1291 | (*c)[i].core_id = -1; | ||
1292 | |||
1293 | *p = calloc(topo.num_packages, sizeof(struct pkg_data)); | ||
1294 | if (*p == NULL) | ||
1295 | goto error; | ||
1296 | |||
1297 | for (i = 0; i < topo.num_packages; i++) | ||
1298 | (*p)[i].package_id = i; | ||
1299 | |||
1300 | return; | ||
1301 | error: | ||
1302 | perror("calloc counters"); | ||
1303 | exit(1); | ||
1304 | } | ||
1305 | /* | ||
1306 | * init_counter() | ||
1307 | * | ||
1308 | * set cpu_id, core_num, pkg_num | ||
1309 | * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE | ||
1310 | * | ||
1311 | * increment topo.num_cores when 1st core in pkg seen | ||
1312 | */ | ||
1313 | void init_counter(struct thread_data *thread_base, struct core_data *core_base, | ||
1314 | struct pkg_data *pkg_base, int thread_num, int core_num, | ||
1315 | int pkg_num, int cpu_id) | ||
1316 | { | ||
1317 | struct thread_data *t; | ||
1318 | struct core_data *c; | ||
1319 | struct pkg_data *p; | ||
1320 | |||
1321 | t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); | ||
1322 | c = GET_CORE(core_base, core_num, pkg_num); | ||
1323 | p = GET_PKG(pkg_base, pkg_num); | ||
1324 | |||
1325 | t->cpu_id = cpu_id; | ||
1326 | if (thread_num == 0) { | ||
1327 | t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; | ||
1328 | if (cpu_is_first_core_in_package(cpu_id)) | ||
1329 | t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
1330 | } | ||
1331 | |||
1332 | c->core_id = core_num; | ||
1333 | p->package_id = pkg_num; | ||
1334 | } | ||
1335 | |||
1336 | |||
1337 | int initialize_counters(int cpu_id) | ||
1338 | { | ||
1339 | int my_thread_id, my_core_id, my_package_id; | ||
1340 | |||
1341 | my_package_id = get_physical_package_id(cpu_id); | ||
1342 | my_core_id = get_core_id(cpu_id); | ||
1343 | |||
1344 | if (cpu_is_first_sibling_in_core(cpu_id)) { | ||
1345 | my_thread_id = 0; | ||
1346 | topo.num_cores++; | ||
1347 | } else { | ||
1348 | my_thread_id = 1; | ||
1349 | } | ||
1350 | |||
1351 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
1352 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
1353 | return 0; | ||
1354 | } | ||
1355 | |||
1356 | void allocate_output_buffer() | ||
1357 | { | ||
1358 | output_buffer = calloc(1, (1 + topo.num_cpus) * 128); | ||
1359 | outp = output_buffer; | ||
1360 | if (outp == NULL) { | ||
1361 | perror("calloc"); | ||
1362 | exit(-1); | ||
1363 | } | ||
1364 | } | ||
1365 | |||
1366 | void setup_all_buffers(void) | ||
1367 | { | ||
1368 | topology_probe(); | ||
1369 | allocate_counters(&thread_even, &core_even, &package_even); | ||
1370 | allocate_counters(&thread_odd, &core_odd, &package_odd); | ||
1371 | allocate_output_buffer(); | ||
1372 | for_all_proc_cpus(initialize_counters); | ||
1373 | } | ||
1054 | void turbostat_init() | 1374 | void turbostat_init() |
1055 | { | 1375 | { |
1056 | check_cpuid(); | 1376 | check_cpuid(); |
@@ -1058,21 +1378,19 @@ void turbostat_init() | |||
1058 | check_dev_msr(); | 1378 | check_dev_msr(); |
1059 | check_super_user(); | 1379 | check_super_user(); |
1060 | 1380 | ||
1061 | num_cpus = for_all_cpus(alloc_new_counters); | 1381 | setup_all_buffers(); |
1062 | cpu_mask_init(num_cpus); | ||
1063 | 1382 | ||
1064 | if (verbose) | 1383 | if (verbose) |
1065 | print_nehalem_info(); | 1384 | print_verbose_header(); |
1066 | } | 1385 | } |
1067 | 1386 | ||
1068 | int fork_it(char **argv) | 1387 | int fork_it(char **argv) |
1069 | { | 1388 | { |
1070 | int retval; | ||
1071 | pid_t child_pid; | 1389 | pid_t child_pid; |
1072 | get_counters(cnt_even); | ||
1073 | 1390 | ||
1074 | /* clear affinity side-effect of get_counters() */ | 1391 | for_all_cpus(get_counters, EVEN_COUNTERS); |
1075 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | 1392 | /* clear affinity side-effect of get_counters() */ |
1393 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | ||
1076 | gettimeofday(&tv_even, (struct timezone *)NULL); | 1394 | gettimeofday(&tv_even, (struct timezone *)NULL); |
1077 | 1395 | ||
1078 | child_pid = fork(); | 1396 | child_pid = fork(); |
@@ -1095,14 +1413,17 @@ int fork_it(char **argv) | |||
1095 | exit(1); | 1413 | exit(1); |
1096 | } | 1414 | } |
1097 | } | 1415 | } |
1098 | get_counters(cnt_odd); | 1416 | /* |
1417 | * n.b. fork_it() does not check for errors from for_all_cpus() | ||
1418 | * because re-starting is problematic when forking | ||
1419 | */ | ||
1420 | for_all_cpus(get_counters, ODD_COUNTERS); | ||
1099 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 1421 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
1100 | retval = compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
1101 | |||
1102 | timersub(&tv_odd, &tv_even, &tv_delta); | 1422 | timersub(&tv_odd, &tv_even, &tv_delta); |
1103 | compute_average(cnt_delta, cnt_average); | 1423 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
1104 | if (!retval) | 1424 | compute_average(EVEN_COUNTERS); |
1105 | print_counters(cnt_delta); | 1425 | format_all_counters(EVEN_COUNTERS); |
1426 | flush_stderr(); | ||
1106 | 1427 | ||
1107 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); | 1428 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); |
1108 | 1429 | ||
@@ -1115,8 +1436,14 @@ void cmdline(int argc, char **argv) | |||
1115 | 1436 | ||
1116 | progname = argv[0]; | 1437 | progname = argv[0]; |
1117 | 1438 | ||
1118 | while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { | 1439 | while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { |
1119 | switch (opt) { | 1440 | switch (opt) { |
1441 | case 'c': | ||
1442 | show_core_only++; | ||
1443 | break; | ||
1444 | case 'p': | ||
1445 | show_pkg_only++; | ||
1446 | break; | ||
1120 | case 's': | 1447 | case 's': |
1121 | summary_only++; | 1448 | summary_only++; |
1122 | break; | 1449 | break; |
@@ -1142,10 +1469,8 @@ int main(int argc, char **argv) | |||
1142 | cmdline(argc, argv); | 1469 | cmdline(argc, argv); |
1143 | 1470 | ||
1144 | if (verbose > 1) | 1471 | if (verbose > 1) |
1145 | fprintf(stderr, "turbostat Dec 6, 2010" | 1472 | fprintf(stderr, "turbostat v2.0 May 16, 2012" |
1146 | " - Len Brown <lenb@kernel.org>\n"); | 1473 | " - Len Brown <lenb@kernel.org>\n"); |
1147 | if (verbose > 1) | ||
1148 | fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); | ||
1149 | 1474 | ||
1150 | turbostat_init(); | 1475 | turbostat_init(); |
1151 | 1476 | ||