diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 1 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 77 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1329 |
3 files changed, 868 insertions, 539 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index fd8e1f1297aa..f85649554191 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | turbostat : turbostat.c | 1 | turbostat : turbostat.c |
2 | CFLAGS += -Wall | ||
2 | 3 | ||
3 | clean : | 4 | clean : |
4 | rm -f turbostat | 5 | rm -f turbostat |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index adf175f61496..74e44507dfe9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -27,7 +27,11 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. | |||
27 | on processors that additionally support C-state residency counters. | 27 | on processors that additionally support C-state residency counters. |
28 | 28 | ||
29 | .SS Options | 29 | .SS Options |
30 | The \fB-s\fP option prints only a 1-line summary for each sample interval. | 30 | The \fB-s\fP option limits output to a 1-line system summary for each interval. |
31 | .PP | ||
32 | The \fB-c\fP option limits output to the 1st thread in each core. | ||
33 | .PP | ||
34 | The \fB-p\fP option limits output to the 1st thread in each package. | ||
31 | .PP | 35 | .PP |
32 | The \fB-v\fP option increases verbosity. | 36 | The \fB-v\fP option increases verbosity. |
33 | .PP | 37 | .PP |
@@ -65,19 +69,19 @@ Subsequent rows show per-CPU statistics. | |||
65 | .nf | 69 | .nf |
66 | [root@x980]# ./turbostat | 70 | [root@x980]# ./turbostat |
67 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 71 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
68 | 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 | 72 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 |
69 | 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 | 73 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 |
70 | 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 | 74 | 0 6 0.05 1.62 3.38 10.34 |
71 | 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 | 75 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 |
72 | 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 | 76 | 1 8 0.03 1.62 3.38 0.06 |
73 | 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 | 77 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 |
74 | 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 | 78 | 2 10 0.02 1.62 3.38 0.29 |
75 | 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 | 79 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 |
76 | 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 | 80 | 8 7 0.01 1.62 3.38 0.06 |
77 | 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 | 81 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 |
78 | 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 | 82 | 9 9 0.02 1.62 3.38 0.60 |
79 | 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 | 83 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 |
80 | 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 | 84 | 10 11 0.02 1.62 3.38 0.02 |
81 | .fi | 85 | .fi |
82 | .SH SUMMARY EXAMPLE | 86 | .SH SUMMARY EXAMPLE |
83 | The "-s" option prints the column headers just once, | 87 | The "-s" option prints the column headers just once, |
@@ -86,9 +90,10 @@ and then the one line system summary for each sample interval. | |||
86 | .nf | 90 | .nf |
87 | [root@x980]# ./turbostat -s | 91 | [root@x980]# ./turbostat -s |
88 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 92 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
89 | 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 | 93 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 |
90 | 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 | 94 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 |
91 | 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 | 95 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 |
96 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | ||
92 | .fi | 97 | .fi |
93 | .SH VERBOSE EXAMPLE | 98 | .SH VERBOSE EXAMPLE |
94 | The "-v" option adds verbosity to the output: | 99 | The "-v" option adds verbosity to the output: |
@@ -120,30 +125,28 @@ until ^C while the other CPUs are mostly idle: | |||
120 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null | 125 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null |
121 | ^C | 126 | ^C |
122 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 127 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
123 | 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 | 128 | 8.86 3.61 3.38 15.06 31.19 44.89 0.00 0.00 |
124 | 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 | 129 | 0 0 1.46 3.22 3.38 16.84 29.48 52.22 0.00 0.00 |
125 | 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 | 130 | 0 6 0.21 3.06 3.38 18.09 |
126 | 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 | 131 | 1 2 0.53 3.33 3.38 2.80 46.40 50.27 |
127 | 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 | 132 | 1 8 0.89 3.47 3.38 2.44 |
128 | 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 | 133 | 2 4 1.36 3.43 3.38 9.04 23.71 65.89 |
129 | 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 | 134 | 2 10 0.18 2.86 3.38 10.22 |
130 | 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 | 135 | 8 1 0.04 2.87 3.38 99.96 0.01 0.00 |
131 | 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 | 136 | 8 7 99.72 3.63 3.38 0.27 |
132 | 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 | 137 | 9 3 0.31 3.21 3.38 7.64 56.55 35.50 |
133 | 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 | 138 | 9 9 0.08 2.95 3.38 7.88 |
134 | 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 | 139 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
135 | 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 | 140 | 10 11 0.16 2.88 3.38 3.40 |
136 | 4.907015 sec | ||
137 | |||
138 | .fi | 141 | .fi |
139 | Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit | 142 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit |
140 | while the other processors are generally in various states of idle. | 143 | while the other processors are generally in various states of idle. |
141 | 144 | ||
142 | Note that cpu0 is an HT sibling sharing core0 | 145 | Note that cpu1 and cpu7 are HT siblings within core8. |
143 | with cpu6, and thus it is unable to get to an idle state | 146 | As cpu7 is very busy, it prevents its sibling, cpu1, |
144 | deeper than c1 while cpu6 is busy. | 147 | from entering a c-state deeper than c1. |
145 | 148 | ||
146 | Note that turbostat reports average GHz of 3.64, while | 149 | Note that turbostat reports average GHz of 3.63, while |
147 | the arithmetic average of the GHz column above is lower. | 150 | the arithmetic average of the GHz column above is lower. |
148 | This is a weighted average, where the weight is %c0. ie. it is the total number of | 151 | This is a weighted average, where the weight is %c0. ie. it is the total number of |
149 | un-halted cycles elapsed per time divided by the number of CPUs. | 152 | un-halted cycles elapsed per time divided by the number of CPUs. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 16de7ad4850f..b815a12159b2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -67,92 +67,119 @@ double bclk; | |||
67 | unsigned int show_pkg; | 67 | unsigned int show_pkg; |
68 | unsigned int show_core; | 68 | unsigned int show_core; |
69 | unsigned int show_cpu; | 69 | unsigned int show_cpu; |
70 | unsigned int show_pkg_only; | ||
71 | unsigned int show_core_only; | ||
72 | char *output_buffer, *outp; | ||
70 | 73 | ||
71 | int aperf_mperf_unstable; | 74 | int aperf_mperf_unstable; |
72 | int backwards_count; | 75 | int backwards_count; |
73 | char *progname; | 76 | char *progname; |
74 | 77 | ||
75 | int num_cpus; | 78 | cpu_set_t *cpu_present_set, *cpu_affinity_set; |
76 | cpu_set_t *cpu_present_set, *cpu_mask; | 79 | size_t cpu_present_setsize, cpu_affinity_setsize; |
77 | size_t cpu_present_setsize, cpu_mask_size; | 80 | |
78 | 81 | struct thread_data { | |
79 | struct counters { | 82 | unsigned long long tsc; |
80 | unsigned long long tsc; /* per thread */ | 83 | unsigned long long aperf; |
81 | unsigned long long aperf; /* per thread */ | 84 | unsigned long long mperf; |
82 | unsigned long long mperf; /* per thread */ | 85 | unsigned long long c1; /* derived */ |
83 | unsigned long long c1; /* per thread (calculated) */ | 86 | unsigned long long extra_msr; |
84 | unsigned long long c3; /* per core */ | 87 | unsigned int cpu_id; |
85 | unsigned long long c6; /* per core */ | 88 | unsigned int flags; |
86 | unsigned long long c7; /* per core */ | 89 | #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 |
87 | unsigned long long pc2; /* per package */ | 90 | #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 |
88 | unsigned long long pc3; /* per package */ | 91 | } *thread_even, *thread_odd; |
89 | unsigned long long pc6; /* per package */ | 92 | |
90 | unsigned long long pc7; /* per package */ | 93 | struct core_data { |
91 | unsigned long long extra_msr; /* per thread */ | 94 | unsigned long long c3; |
92 | int pkg; | 95 | unsigned long long c6; |
93 | int core; | 96 | unsigned long long c7; |
94 | int cpu; | 97 | unsigned int core_id; |
95 | struct counters *next; | 98 | } *core_even, *core_odd; |
96 | }; | 99 | |
97 | 100 | struct pkg_data { | |
98 | struct counters *cnt_even; | 101 | unsigned long long pc2; |
99 | struct counters *cnt_odd; | 102 | unsigned long long pc3; |
100 | struct counters *cnt_delta; | 103 | unsigned long long pc6; |
101 | struct counters *cnt_average; | 104 | unsigned long long pc7; |
102 | struct timeval tv_even; | 105 | unsigned int package_id; |
103 | struct timeval tv_odd; | 106 | } *package_even, *package_odd; |
104 | struct timeval tv_delta; | 107 | |
105 | 108 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | |
106 | int mark_cpu_present(int pkg, int core, int cpu) | 109 | #define EVEN_COUNTERS thread_even, core_even, package_even |
110 | |||
111 | #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ | ||
112 | (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ | ||
113 | topo.num_threads_per_core + \ | ||
114 | (core_no) * topo.num_threads_per_core + (thread_no)) | ||
115 | #define GET_CORE(core_base, core_no, pkg_no) \ | ||
116 | (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) | ||
117 | #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) | ||
118 | |||
119 | struct system_summary { | ||
120 | struct thread_data threads; | ||
121 | struct core_data cores; | ||
122 | struct pkg_data packages; | ||
123 | } sum, average; | ||
124 | |||
125 | |||
126 | struct topo_params { | ||
127 | int num_packages; | ||
128 | int num_cpus; | ||
129 | int num_cores; | ||
130 | int max_cpu_num; | ||
131 | int num_cores_per_pkg; | ||
132 | int num_threads_per_core; | ||
133 | } topo; | ||
134 | |||
135 | struct timeval tv_even, tv_odd, tv_delta; | ||
136 | |||
137 | void setup_all_buffers(void); | ||
138 | |||
139 | int cpu_is_not_present(int cpu) | ||
107 | { | 140 | { |
108 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); | 141 | return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); |
109 | return 0; | ||
110 | } | 142 | } |
111 | |||
112 | /* | 143 | /* |
113 | * cpu_mask_init(ncpus) | 144 | * run func(thread, core, package) in topology order |
114 | * | 145 | * skip non-present cpus |
115 | * allocate and clear cpu_mask | ||
116 | * set cpu_mask_size | ||
117 | */ | 146 | */ |
118 | void cpu_mask_init(int ncpus) | 147 | |
148 | int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), | ||
149 | struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) | ||
119 | { | 150 | { |
120 | cpu_mask = CPU_ALLOC(ncpus); | 151 | int retval, pkg_no, core_no, thread_no; |
121 | if (cpu_mask == NULL) { | ||
122 | perror("CPU_ALLOC"); | ||
123 | exit(3); | ||
124 | } | ||
125 | cpu_mask_size = CPU_ALLOC_SIZE(ncpus); | ||
126 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | ||
127 | 152 | ||
128 | /* | 153 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { |
129 | * Allocate and initialize cpu_present_set | 154 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { |
130 | */ | 155 | for (thread_no = 0; thread_no < |
131 | cpu_present_set = CPU_ALLOC(ncpus); | 156 | topo.num_threads_per_core; ++thread_no) { |
132 | if (cpu_present_set == NULL) { | 157 | struct thread_data *t; |
133 | perror("CPU_ALLOC"); | 158 | struct core_data *c; |
134 | exit(3); | 159 | struct pkg_data *p; |
135 | } | ||
136 | cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); | ||
137 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
138 | for_all_cpus(mark_cpu_present); | ||
139 | } | ||
140 | 160 | ||
141 | void cpu_mask_uninit() | 161 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); |
142 | { | 162 | |
143 | CPU_FREE(cpu_mask); | 163 | if (cpu_is_not_present(t->cpu_id)) |
144 | cpu_mask = NULL; | 164 | continue; |
145 | cpu_mask_size = 0; | 165 | |
146 | CPU_FREE(cpu_present_set); | 166 | c = GET_CORE(core_base, core_no, pkg_no); |
147 | cpu_present_set = NULL; | 167 | p = GET_PKG(pkg_base, pkg_no); |
148 | cpu_present_setsize = 0; | 168 | |
169 | retval = func(t, c, p); | ||
170 | if (retval) | ||
171 | return retval; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | return 0; | ||
149 | } | 176 | } |
150 | 177 | ||
151 | int cpu_migrate(int cpu) | 178 | int cpu_migrate(int cpu) |
152 | { | 179 | { |
153 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | 180 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); |
154 | CPU_SET_S(cpu, cpu_mask_size, cpu_mask); | 181 | CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); |
155 | if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) | 182 | if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) |
156 | return -1; | 183 | return -1; |
157 | else | 184 | else |
158 | return 0; | 185 | return 0; |
@@ -181,67 +208,72 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
181 | void print_header(void) | 208 | void print_header(void) |
182 | { | 209 | { |
183 | if (show_pkg) | 210 | if (show_pkg) |
184 | fprintf(stderr, "pk"); | 211 | outp += sprintf(outp, "pk"); |
185 | if (show_pkg) | 212 | if (show_pkg) |
186 | fprintf(stderr, " "); | 213 | outp += sprintf(outp, " "); |
187 | if (show_core) | 214 | if (show_core) |
188 | fprintf(stderr, "cor"); | 215 | outp += sprintf(outp, "cor"); |
189 | if (show_cpu) | 216 | if (show_cpu) |
190 | fprintf(stderr, " CPU"); | 217 | outp += sprintf(outp, " CPU"); |
191 | if (show_pkg || show_core || show_cpu) | 218 | if (show_pkg || show_core || show_cpu) |
192 | fprintf(stderr, " "); | 219 | outp += sprintf(outp, " "); |
193 | if (do_nhm_cstates) | 220 | if (do_nhm_cstates) |
194 | fprintf(stderr, " %%c0"); | 221 | outp += sprintf(outp, " %%c0"); |
195 | if (has_aperf) | 222 | if (has_aperf) |
196 | fprintf(stderr, " GHz"); | 223 | outp += sprintf(outp, " GHz"); |
197 | fprintf(stderr, " TSC"); | 224 | outp += sprintf(outp, " TSC"); |
198 | if (do_nhm_cstates) | 225 | if (do_nhm_cstates) |
199 | fprintf(stderr, " %%c1"); | 226 | outp += sprintf(outp, " %%c1"); |
200 | if (do_nhm_cstates) | 227 | if (do_nhm_cstates) |
201 | fprintf(stderr, " %%c3"); | 228 | outp += sprintf(outp, " %%c3"); |
202 | if (do_nhm_cstates) | 229 | if (do_nhm_cstates) |
203 | fprintf(stderr, " %%c6"); | 230 | outp += sprintf(outp, " %%c6"); |
204 | if (do_snb_cstates) | 231 | if (do_snb_cstates) |
205 | fprintf(stderr, " %%c7"); | 232 | outp += sprintf(outp, " %%c7"); |
206 | if (do_snb_cstates) | 233 | if (do_snb_cstates) |
207 | fprintf(stderr, " %%pc2"); | 234 | outp += sprintf(outp, " %%pc2"); |
208 | if (do_nhm_cstates) | 235 | if (do_nhm_cstates) |
209 | fprintf(stderr, " %%pc3"); | 236 | outp += sprintf(outp, " %%pc3"); |
210 | if (do_nhm_cstates) | 237 | if (do_nhm_cstates) |
211 | fprintf(stderr, " %%pc6"); | 238 | outp += sprintf(outp, " %%pc6"); |
212 | if (do_snb_cstates) | 239 | if (do_snb_cstates) |
213 | fprintf(stderr, " %%pc7"); | 240 | outp += sprintf(outp, " %%pc7"); |
214 | if (extra_msr_offset) | 241 | if (extra_msr_offset) |
215 | fprintf(stderr, " MSR 0x%x ", extra_msr_offset); | 242 | outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); |
216 | 243 | ||
217 | putc('\n', stderr); | 244 | outp += sprintf(outp, "\n"); |
218 | } | 245 | } |
219 | 246 | ||
220 | void dump_cnt(struct counters *cnt) | 247 | int dump_counters(struct thread_data *t, struct core_data *c, |
248 | struct pkg_data *p) | ||
221 | { | 249 | { |
222 | if (!cnt) | 250 | fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); |
223 | return; | 251 | |
224 | if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg); | 252 | if (t) { |
225 | if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core); | 253 | fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); |
226 | if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu); | 254 | fprintf(stderr, "TSC: %016llX\n", t->tsc); |
227 | if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc); | 255 | fprintf(stderr, "aperf: %016llX\n", t->aperf); |
228 | if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3); | 256 | fprintf(stderr, "mperf: %016llX\n", t->mperf); |
229 | if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6); | 257 | fprintf(stderr, "c1: %016llX\n", t->c1); |
230 | if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7); | 258 | fprintf(stderr, "msr0x%x: %016llX\n", |
231 | if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf); | 259 | extra_msr_offset, t->extra_msr); |
232 | if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2); | 260 | } |
233 | if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3); | ||
234 | if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6); | ||
235 | if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7); | ||
236 | if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr); | ||
237 | } | ||
238 | 261 | ||
239 | void dump_list(struct counters *cnt) | 262 | if (c) { |
240 | { | 263 | fprintf(stderr, "core: %d\n", c->core_id); |
241 | printf("dump_list 0x%p\n", cnt); | 264 | fprintf(stderr, "c3: %016llX\n", c->c3); |
265 | fprintf(stderr, "c6: %016llX\n", c->c6); | ||
266 | fprintf(stderr, "c7: %016llX\n", c->c7); | ||
267 | } | ||
242 | 268 | ||
243 | for (; cnt; cnt = cnt->next) | 269 | if (p) { |
244 | dump_cnt(cnt); | 270 | fprintf(stderr, "package: %d\n", p->package_id); |
271 | fprintf(stderr, "pc2: %016llX\n", p->pc2); | ||
272 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | ||
273 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | ||
274 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | ||
275 | } | ||
276 | return 0; | ||
245 | } | 277 | } |
246 | 278 | ||
247 | /* | 279 | /* |
@@ -253,321 +285,385 @@ void dump_list(struct counters *cnt) | |||
253 | * TSC: "TSC" 3 columns %3.2 | 285 | * TSC: "TSC" 3 columns %3.2 |
254 | * percentage " %pc3" %6.2 | 286 | * percentage " %pc3" %6.2 |
255 | */ | 287 | */ |
256 | void print_cnt(struct counters *p) | 288 | int format_counters(struct thread_data *t, struct core_data *c, |
289 | struct pkg_data *p) | ||
257 | { | 290 | { |
258 | double interval_float; | 291 | double interval_float; |
259 | 292 | ||
293 | /* if showing only 1st thread in core and this isn't one, bail out */ | ||
294 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
295 | return 0; | ||
296 | |||
297 | /* if showing only 1st thread in pkg and this isn't one, bail out */ | ||
298 | if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
299 | return 0; | ||
300 | |||
260 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; | 301 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; |
261 | 302 | ||
262 | /* topology columns, print blanks on 1st (average) line */ | 303 | /* topo columns, print blanks on 1st (average) line */ |
263 | if (p == cnt_average) { | 304 | if (t == &average.threads) { |
264 | if (show_pkg) | 305 | if (show_pkg) |
265 | fprintf(stderr, " "); | 306 | outp += sprintf(outp, " "); |
266 | if (show_pkg && show_core) | 307 | if (show_pkg && show_core) |
267 | fprintf(stderr, " "); | 308 | outp += sprintf(outp, " "); |
268 | if (show_core) | 309 | if (show_core) |
269 | fprintf(stderr, " "); | 310 | outp += sprintf(outp, " "); |
270 | if (show_cpu) | 311 | if (show_cpu) |
271 | fprintf(stderr, " " " "); | 312 | outp += sprintf(outp, " " " "); |
272 | } else { | 313 | } else { |
273 | if (show_pkg) | 314 | if (show_pkg) { |
274 | fprintf(stderr, "%2d", p->pkg); | 315 | if (p) |
316 | outp += sprintf(outp, "%2d", p->package_id); | ||
317 | else | ||
318 | outp += sprintf(outp, " "); | ||
319 | } | ||
275 | if (show_pkg && show_core) | 320 | if (show_pkg && show_core) |
276 | fprintf(stderr, " "); | 321 | outp += sprintf(outp, " "); |
277 | if (show_core) | 322 | if (show_core) { |
278 | fprintf(stderr, "%3d", p->core); | 323 | if (c) |
324 | outp += sprintf(outp, "%3d", c->core_id); | ||
325 | else | ||
326 | outp += sprintf(outp, " "); | ||
327 | } | ||
279 | if (show_cpu) | 328 | if (show_cpu) |
280 | fprintf(stderr, " %3d", p->cpu); | 329 | outp += sprintf(outp, " %3d", t->cpu_id); |
281 | } | 330 | } |
282 | 331 | ||
283 | /* %c0 */ | 332 | /* %c0 */ |
284 | if (do_nhm_cstates) { | 333 | if (do_nhm_cstates) { |
285 | if (show_pkg || show_core || show_cpu) | 334 | if (show_pkg || show_core || show_cpu) |
286 | fprintf(stderr, " "); | 335 | outp += sprintf(outp, " "); |
287 | if (!skip_c0) | 336 | if (!skip_c0) |
288 | fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); | 337 | outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); |
289 | else | 338 | else |
290 | fprintf(stderr, " ****"); | 339 | outp += sprintf(outp, " ****"); |
291 | } | 340 | } |
292 | 341 | ||
293 | /* GHz */ | 342 | /* GHz */ |
294 | if (has_aperf) { | 343 | if (has_aperf) { |
295 | if (!aperf_mperf_unstable) { | 344 | if (!aperf_mperf_unstable) { |
296 | fprintf(stderr, " %3.2f", | 345 | outp += sprintf(outp, " %3.2f", |
297 | 1.0 * p->tsc / units * p->aperf / | 346 | 1.0 * t->tsc / units * t->aperf / |
298 | p->mperf / interval_float); | 347 | t->mperf / interval_float); |
299 | } else { | 348 | } else { |
300 | if (p->aperf > p->tsc || p->mperf > p->tsc) { | 349 | if (t->aperf > t->tsc || t->mperf > t->tsc) { |
301 | fprintf(stderr, " ***"); | 350 | outp += sprintf(outp, " ***"); |
302 | } else { | 351 | } else { |
303 | fprintf(stderr, "%3.1f*", | 352 | outp += sprintf(outp, "%3.1f*", |
304 | 1.0 * p->tsc / | 353 | 1.0 * t->tsc / |
305 | units * p->aperf / | 354 | units * t->aperf / |
306 | p->mperf / interval_float); | 355 | t->mperf / interval_float); |
307 | } | 356 | } |
308 | } | 357 | } |
309 | } | 358 | } |
310 | 359 | ||
311 | /* TSC */ | 360 | /* TSC */ |
312 | fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); | 361 | outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); |
313 | 362 | ||
314 | if (do_nhm_cstates) { | 363 | if (do_nhm_cstates) { |
315 | if (!skip_c1) | 364 | if (!skip_c1) |
316 | fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); | 365 | outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); |
317 | else | 366 | else |
318 | fprintf(stderr, " ****"); | 367 | outp += sprintf(outp, " ****"); |
319 | } | 368 | } |
369 | |||
370 | /* print per-core data only for 1st thread in core */ | ||
371 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
372 | goto done; | ||
373 | |||
320 | if (do_nhm_cstates) | 374 | if (do_nhm_cstates) |
321 | fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); | 375 | outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); |
322 | if (do_nhm_cstates) | 376 | if (do_nhm_cstates) |
323 | fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); | 377 | outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); |
324 | if (do_snb_cstates) | 378 | if (do_snb_cstates) |
325 | fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); | 379 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
380 | |||
381 | /* print per-package data only for 1st core in package */ | ||
382 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
383 | goto done; | ||
384 | |||
326 | if (do_snb_cstates) | 385 | if (do_snb_cstates) |
327 | fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); | 386 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
328 | if (do_nhm_cstates) | 387 | if (do_nhm_cstates) |
329 | fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); | 388 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); |
330 | if (do_nhm_cstates) | 389 | if (do_nhm_cstates) |
331 | fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); | 390 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
332 | if (do_snb_cstates) | 391 | if (do_snb_cstates) |
333 | fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); | 392 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
393 | done: | ||
334 | if (extra_msr_offset) | 394 | if (extra_msr_offset) |
335 | fprintf(stderr, " 0x%016llx", p->extra_msr); | 395 | outp += sprintf(outp, " 0x%016llx", t->extra_msr); |
336 | putc('\n', stderr); | 396 | outp += sprintf(outp, "\n"); |
397 | |||
398 | return 0; | ||
337 | } | 399 | } |
338 | 400 | ||
339 | void print_counters(struct counters *counters) | 401 | void flush_stdout() |
402 | { | ||
403 | fputs(output_buffer, stdout); | ||
404 | outp = output_buffer; | ||
405 | } | ||
406 | void flush_stderr() | ||
407 | { | ||
408 | fputs(output_buffer, stderr); | ||
409 | outp = output_buffer; | ||
410 | } | ||
411 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
340 | { | 412 | { |
341 | struct counters *cnt; | ||
342 | static int printed; | 413 | static int printed; |
343 | 414 | ||
344 | |||
345 | if (!printed || !summary_only) | 415 | if (!printed || !summary_only) |
346 | print_header(); | 416 | print_header(); |
347 | 417 | ||
348 | if (num_cpus > 1) | 418 | if (topo.num_cpus > 1) |
349 | print_cnt(cnt_average); | 419 | format_counters(&average.threads, &average.cores, |
420 | &average.packages); | ||
350 | 421 | ||
351 | printed = 1; | 422 | printed = 1; |
352 | 423 | ||
353 | if (summary_only) | 424 | if (summary_only) |
354 | return; | 425 | return; |
355 | 426 | ||
356 | for (cnt = counters; cnt != NULL; cnt = cnt->next) | 427 | for_all_cpus(format_counters, t, c, p); |
357 | print_cnt(cnt); | ||
358 | |||
359 | } | 428 | } |
360 | 429 | ||
361 | #define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) | 430 | void |
431 | delta_package(struct pkg_data *new, struct pkg_data *old) | ||
432 | { | ||
433 | old->pc2 = new->pc2 - old->pc2; | ||
434 | old->pc3 = new->pc3 - old->pc3; | ||
435 | old->pc6 = new->pc6 - old->pc6; | ||
436 | old->pc7 = new->pc7 - old->pc7; | ||
437 | } | ||
362 | 438 | ||
363 | int compute_delta(struct counters *after, | 439 | void |
364 | struct counters *before, struct counters *delta) | 440 | delta_core(struct core_data *new, struct core_data *old) |
365 | { | 441 | { |
366 | int errors = 0; | 442 | old->c3 = new->c3 - old->c3; |
367 | int perf_err = 0; | 443 | old->c6 = new->c6 - old->c6; |
444 | old->c7 = new->c7 - old->c7; | ||
445 | } | ||
368 | 446 | ||
369 | skip_c0 = skip_c1 = 0; | 447 | void |
448 | delta_thread(struct thread_data *new, struct thread_data *old, | ||
449 | struct core_data *core_delta) | ||
450 | { | ||
451 | old->tsc = new->tsc - old->tsc; | ||
452 | |||
453 | /* check for TSC < 1 Mcycles over interval */ | ||
454 | if (old->tsc < (1000 * 1000)) { | ||
455 | fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); | ||
456 | fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); | ||
457 | fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); | ||
458 | exit(-3); | ||
459 | } | ||
370 | 460 | ||
371 | for ( ; after && before && delta; | 461 | old->c1 = new->c1 - old->c1; |
372 | after = after->next, before = before->next, delta = delta->next) { | ||
373 | if (before->cpu != after->cpu) { | ||
374 | printf("cpu configuration changed: %d != %d\n", | ||
375 | before->cpu, after->cpu); | ||
376 | return -1; | ||
377 | } | ||
378 | 462 | ||
379 | if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { | 463 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { |
380 | fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", | 464 | old->aperf = new->aperf - old->aperf; |
381 | before->cpu, before->tsc, after->tsc); | 465 | old->mperf = new->mperf - old->mperf; |
382 | errors++; | 466 | } else { |
383 | } | ||
384 | /* check for TSC < 1 Mcycles over interval */ | ||
385 | if (delta->tsc < (1000 * 1000)) { | ||
386 | fprintf(stderr, "Insanely slow TSC rate," | ||
387 | " TSC stops in idle?\n"); | ||
388 | fprintf(stderr, "You can disable all c-states" | ||
389 | " by booting with \"idle=poll\"\n"); | ||
390 | fprintf(stderr, "or just the deep ones with" | ||
391 | " \"processor.max_cstate=1\"\n"); | ||
392 | exit(-3); | ||
393 | } | ||
394 | if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { | ||
395 | fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", | ||
396 | before->cpu, before->c3, after->c3); | ||
397 | errors++; | ||
398 | } | ||
399 | if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { | ||
400 | fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", | ||
401 | before->cpu, before->c6, after->c6); | ||
402 | errors++; | ||
403 | } | ||
404 | if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { | ||
405 | fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", | ||
406 | before->cpu, before->c7, after->c7); | ||
407 | errors++; | ||
408 | } | ||
409 | if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { | ||
410 | fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", | ||
411 | before->cpu, before->pc2, after->pc2); | ||
412 | errors++; | ||
413 | } | ||
414 | if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { | ||
415 | fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", | ||
416 | before->cpu, before->pc3, after->pc3); | ||
417 | errors++; | ||
418 | } | ||
419 | if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { | ||
420 | fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", | ||
421 | before->cpu, before->pc6, after->pc6); | ||
422 | errors++; | ||
423 | } | ||
424 | if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { | ||
425 | fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", | ||
426 | before->cpu, before->pc7, after->pc7); | ||
427 | errors++; | ||
428 | } | ||
429 | 467 | ||
430 | perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); | 468 | if (!aperf_mperf_unstable) { |
431 | if (perf_err) { | 469 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); |
432 | fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", | 470 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); |
433 | before->cpu, before->aperf, after->aperf); | 471 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); |
434 | } | ||
435 | perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); | ||
436 | if (perf_err) { | ||
437 | fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", | ||
438 | before->cpu, before->mperf, after->mperf); | ||
439 | } | ||
440 | if (perf_err) { | ||
441 | if (!aperf_mperf_unstable) { | ||
442 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | ||
443 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | ||
444 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | ||
445 | 472 | ||
446 | aperf_mperf_unstable = 1; | 473 | aperf_mperf_unstable = 1; |
447 | } | ||
448 | /* | ||
449 | * mperf delta is likely a huge "positive" number | ||
450 | * can not use it for calculating c0 time | ||
451 | */ | ||
452 | skip_c0 = 1; | ||
453 | skip_c1 = 1; | ||
454 | } | 474 | } |
455 | |||
456 | /* | 475 | /* |
457 | * As mperf and tsc collection are not atomic, | 476 | * mperf delta is likely a huge "positive" number |
458 | * it is possible for mperf's non-halted cycles | 477 | * can not use it for calculating c0 time |
459 | * to exceed TSC's all cycles: show c1 = 0% in that case. | ||
460 | */ | 478 | */ |
461 | if (delta->mperf > delta->tsc) | 479 | skip_c0 = 1; |
462 | delta->c1 = 0; | 480 | skip_c1 = 1; |
463 | else /* normal case, derive c1 */ | 481 | } |
464 | delta->c1 = delta->tsc - delta->mperf | ||
465 | - delta->c3 - delta->c6 - delta->c7; | ||
466 | 482 | ||
467 | if (delta->mperf == 0) | ||
468 | delta->mperf = 1; /* divide by 0 protection */ | ||
469 | 483 | ||
470 | /* | 484 | /* |
471 | * for "extra msr", just copy the latest w/o subtracting | 485 | * As mperf and tsc collection are not atomic, |
472 | */ | 486 | * it is possible for mperf's non-halted cycles |
473 | delta->extra_msr = after->extra_msr; | 487 | * to exceed TSC's all cycles: show c1 = 0% in that case. |
474 | if (errors) { | 488 | */ |
475 | fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); | 489 | if (old->mperf > old->tsc) |
476 | dump_cnt(before); | 490 | old->c1 = 0; |
477 | fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); | 491 | else { |
478 | dump_cnt(after); | 492 | /* normal case, derive c1 */ |
479 | errors = 0; | 493 | old->c1 = old->tsc - old->mperf - core_delta->c3 |
480 | } | 494 | - core_delta->c6 - core_delta->c7; |
495 | } | ||
496 | if (old->mperf == 0) { | ||
497 | if (verbose) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); | ||
498 | old->mperf = 1; /* divide by 0 protection */ | ||
481 | } | 499 | } |
500 | |||
501 | /* | ||
502 | * for "extra msr", just copy the latest w/o subtracting | ||
503 | */ | ||
504 | old->extra_msr = new->extra_msr; | ||
505 | } | ||
506 | |||
507 | int delta_cpu(struct thread_data *t, struct core_data *c, | ||
508 | struct pkg_data *p, struct thread_data *t2, | ||
509 | struct core_data *c2, struct pkg_data *p2) | ||
510 | { | ||
511 | /* calculate core delta only for 1st thread in core */ | ||
512 | if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) | ||
513 | delta_core(c, c2); | ||
514 | |||
515 | /* always calculate thread delta */ | ||
516 | delta_thread(t, t2, c2); /* c2 is core delta */ | ||
517 | |||
518 | /* calculate package delta only for 1st core in package */ | ||
519 | if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) | ||
520 | delta_package(p, p2); | ||
521 | |||
482 | return 0; | 522 | return 0; |
483 | } | 523 | } |
484 | 524 | ||
485 | void compute_average(struct counters *delta, struct counters *avg) | 525 | void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
526 | { | ||
527 | t->tsc = 0; | ||
528 | t->aperf = 0; | ||
529 | t->mperf = 0; | ||
530 | t->c1 = 0; | ||
531 | |||
532 | /* tells format_counters to dump all fields from this set */ | ||
533 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
534 | |||
535 | c->c3 = 0; | ||
536 | c->c6 = 0; | ||
537 | c->c7 = 0; | ||
538 | |||
539 | p->pc2 = 0; | ||
540 | p->pc3 = 0; | ||
541 | p->pc6 = 0; | ||
542 | p->pc7 = 0; | ||
543 | } | ||
544 | int sum_counters(struct thread_data *t, struct core_data *c, | ||
545 | struct pkg_data *p) | ||
486 | { | 546 | { |
487 | struct counters *sum; | 547 | average.threads.tsc += t->tsc; |
548 | average.threads.aperf += t->aperf; | ||
549 | average.threads.mperf += t->mperf; | ||
550 | average.threads.c1 += t->c1; | ||
488 | 551 | ||
489 | sum = calloc(1, sizeof(struct counters)); | 552 | /* sum per-core values only for 1st thread in core */ |
490 | if (sum == NULL) { | 553 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
491 | perror("calloc sum"); | 554 | return 0; |
492 | exit(1); | ||
493 | } | ||
494 | 555 | ||
495 | for (; delta; delta = delta->next) { | 556 | average.cores.c3 += c->c3; |
496 | sum->tsc += delta->tsc; | 557 | average.cores.c6 += c->c6; |
497 | sum->c1 += delta->c1; | 558 | average.cores.c7 += c->c7; |
498 | sum->c3 += delta->c3; | 559 | |
499 | sum->c6 += delta->c6; | 560 | /* sum per-pkg values only for 1st core in pkg */ |
500 | sum->c7 += delta->c7; | 561 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
501 | sum->aperf += delta->aperf; | 562 | return 0; |
502 | sum->mperf += delta->mperf; | 563 | |
503 | sum->pc2 += delta->pc2; | 564 | average.packages.pc2 += p->pc2; |
504 | sum->pc3 += delta->pc3; | 565 | average.packages.pc3 += p->pc3; |
505 | sum->pc6 += delta->pc6; | 566 | average.packages.pc6 += p->pc6; |
506 | sum->pc7 += delta->pc7; | 567 | average.packages.pc7 += p->pc7; |
507 | } | 568 | |
508 | avg->tsc = sum->tsc/num_cpus; | 569 | return 0; |
509 | avg->c1 = sum->c1/num_cpus; | 570 | } |
510 | avg->c3 = sum->c3/num_cpus; | 571 | /* |
511 | avg->c6 = sum->c6/num_cpus; | 572 | * sum the counters for all cpus in the system |
512 | avg->c7 = sum->c7/num_cpus; | 573 | * compute the weighted average |
513 | avg->aperf = sum->aperf/num_cpus; | 574 | */ |
514 | avg->mperf = sum->mperf/num_cpus; | 575 | void compute_average(struct thread_data *t, struct core_data *c, |
515 | avg->pc2 = sum->pc2/num_cpus; | 576 | struct pkg_data *p) |
516 | avg->pc3 = sum->pc3/num_cpus; | 577 | { |
517 | avg->pc6 = sum->pc6/num_cpus; | 578 | clear_counters(&average.threads, &average.cores, &average.packages); |
518 | avg->pc7 = sum->pc7/num_cpus; | 579 | |
519 | 580 | for_all_cpus(sum_counters, t, c, p); | |
520 | free(sum); | 581 | |
582 | average.threads.tsc /= topo.num_cpus; | ||
583 | average.threads.aperf /= topo.num_cpus; | ||
584 | average.threads.mperf /= topo.num_cpus; | ||
585 | average.threads.c1 /= topo.num_cpus; | ||
586 | |||
587 | average.cores.c3 /= topo.num_cores; | ||
588 | average.cores.c6 /= topo.num_cores; | ||
589 | average.cores.c7 /= topo.num_cores; | ||
590 | |||
591 | average.packages.pc2 /= topo.num_packages; | ||
592 | average.packages.pc3 /= topo.num_packages; | ||
593 | average.packages.pc6 /= topo.num_packages; | ||
594 | average.packages.pc7 /= topo.num_packages; | ||
521 | } | 595 | } |
522 | 596 | ||
523 | int get_counters(struct counters *cnt) | 597 | static unsigned long long rdtsc(void) |
524 | { | 598 | { |
525 | for ( ; cnt; cnt = cnt->next) { | 599 | unsigned int low, high; |
526 | 600 | ||
527 | if (cpu_migrate(cnt->cpu)) | 601 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); |
528 | return -1; | ||
529 | 602 | ||
530 | if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) | 603 | return low | ((unsigned long long)high) << 32; |
531 | return -1; | 604 | } |
532 | 605 | ||
533 | if (has_aperf) { | ||
534 | if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) | ||
535 | return -1; | ||
536 | if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) | ||
537 | return -1; | ||
538 | } | ||
539 | 606 | ||
540 | if (do_nhm_cstates) { | 607 | /* |
541 | if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) | 608 | * get_counters(...) |
542 | return -1; | 609 | * migrate to cpu |
543 | if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) | 610 | * acquire and record local counters for that cpu |
544 | return -1; | 611 | */ |
545 | } | 612 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
613 | { | ||
614 | int cpu = t->cpu_id; | ||
546 | 615 | ||
547 | if (do_snb_cstates) | 616 | if (cpu_migrate(cpu)) |
548 | if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) | 617 | return -1; |
549 | return -1; | ||
550 | 618 | ||
551 | if (do_nhm_cstates) { | 619 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
552 | if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) | 620 | |
553 | return -1; | 621 | if (has_aperf) { |
554 | if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) | 622 | if (get_msr(cpu, MSR_APERF, &t->aperf)) |
555 | return -1; | 623 | return -3; |
556 | } | 624 | if (get_msr(cpu, MSR_MPERF, &t->mperf)) |
557 | if (do_snb_cstates) { | 625 | return -4; |
558 | if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) | 626 | } |
559 | return -1; | 627 | |
560 | if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) | 628 | if (extra_msr_offset) |
561 | return -1; | 629 | if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) |
562 | } | 630 | return -5; |
563 | if (extra_msr_offset) | 631 | |
564 | if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) | 632 | /* collect core counters only for 1st thread in core */ |
565 | return -1; | 633 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
634 | return 0; | ||
635 | |||
636 | if (do_nhm_cstates) { | ||
637 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | ||
638 | return -6; | ||
639 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) | ||
640 | return -7; | ||
641 | } | ||
642 | |||
643 | if (do_snb_cstates) | ||
644 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | ||
645 | return -8; | ||
646 | |||
647 | /* collect package counters only for 1st core in package */ | ||
648 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
649 | return 0; | ||
650 | |||
651 | if (do_nhm_cstates) { | ||
652 | if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) | ||
653 | return -9; | ||
654 | if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) | ||
655 | return -10; | ||
656 | } | ||
657 | if (do_snb_cstates) { | ||
658 | if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) | ||
659 | return -11; | ||
660 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | ||
661 | return -12; | ||
566 | } | 662 | } |
567 | return 0; | 663 | return 0; |
568 | } | 664 | } |
569 | 665 | ||
570 | void print_nehalem_info(void) | 666 | void print_verbose_header(void) |
571 | { | 667 | { |
572 | unsigned long long msr; | 668 | unsigned long long msr; |
573 | unsigned int ratio; | 669 | unsigned int ratio; |
@@ -615,143 +711,82 @@ void print_nehalem_info(void) | |||
615 | 711 | ||
616 | } | 712 | } |
617 | 713 | ||
618 | void free_counter_list(struct counters *list) | 714 | void free_all_buffers(void) |
619 | { | 715 | { |
620 | struct counters *p; | 716 | CPU_FREE(cpu_present_set); |
717 | cpu_present_set = NULL; | ||
718 | cpu_present_set = 0; | ||
621 | 719 | ||
622 | for (p = list; p; ) { | 720 | CPU_FREE(cpu_affinity_set); |
623 | struct counters *free_me; | 721 | cpu_affinity_set = NULL; |
722 | cpu_affinity_setsize = 0; | ||
624 | 723 | ||
625 | free_me = p; | 724 | free(thread_even); |
626 | p = p->next; | 725 | free(core_even); |
627 | free(free_me); | 726 | free(package_even); |
628 | } | ||
629 | } | ||
630 | 727 | ||
631 | void free_all_counters(void) | 728 | thread_even = NULL; |
632 | { | 729 | core_even = NULL; |
633 | free_counter_list(cnt_even); | 730 | package_even = NULL; |
634 | cnt_even = NULL; | ||
635 | 731 | ||
636 | free_counter_list(cnt_odd); | 732 | free(thread_odd); |
637 | cnt_odd = NULL; | 733 | free(core_odd); |
734 | free(package_odd); | ||
638 | 735 | ||
639 | free_counter_list(cnt_delta); | 736 | thread_odd = NULL; |
640 | cnt_delta = NULL; | 737 | core_odd = NULL; |
738 | package_odd = NULL; | ||
641 | 739 | ||
642 | free_counter_list(cnt_average); | 740 | free(output_buffer); |
643 | cnt_average = NULL; | 741 | output_buffer = NULL; |
742 | outp = NULL; | ||
644 | } | 743 | } |
645 | 744 | ||
646 | void insert_counters(struct counters **list, | 745 | /* |
647 | struct counters *new) | 746 | * cpu_is_first_sibling_in_core(cpu) |
747 | * return 1 if given CPU is 1st HT sibling in the core | ||
748 | */ | ||
749 | int cpu_is_first_sibling_in_core(int cpu) | ||
648 | { | 750 | { |
649 | struct counters *prev; | 751 | char path[64]; |
650 | 752 | FILE *filep; | |
651 | /* | 753 | int first_cpu; |
652 | * list was empty | ||
653 | */ | ||
654 | if (*list == NULL) { | ||
655 | new->next = *list; | ||
656 | *list = new; | ||
657 | return; | ||
658 | } | ||
659 | |||
660 | if (!summary_only) | ||
661 | show_cpu = 1; /* there is more than one CPU */ | ||
662 | |||
663 | /* | ||
664 | * insert on front of list. | ||
665 | * It is sorted by ascending package#, core#, cpu# | ||
666 | */ | ||
667 | if (((*list)->pkg > new->pkg) || | ||
668 | (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || | ||
669 | (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { | ||
670 | new->next = *list; | ||
671 | *list = new; | ||
672 | return; | ||
673 | } | ||
674 | |||
675 | prev = *list; | ||
676 | |||
677 | while (prev->next && (prev->next->pkg < new->pkg)) { | ||
678 | prev = prev->next; | ||
679 | if (!summary_only) | ||
680 | show_pkg = 1; /* there is more than 1 package */ | ||
681 | } | ||
682 | |||
683 | while (prev->next && (prev->next->pkg == new->pkg) | ||
684 | && (prev->next->core < new->core)) { | ||
685 | prev = prev->next; | ||
686 | if (!summary_only) | ||
687 | show_core = 1; /* there is more than 1 core */ | ||
688 | } | ||
689 | 754 | ||
690 | while (prev->next && (prev->next->pkg == new->pkg) | 755 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); |
691 | && (prev->next->core == new->core) | 756 | filep = fopen(path, "r"); |
692 | && (prev->next->cpu < new->cpu)) { | 757 | if (filep == NULL) { |
693 | prev = prev->next; | 758 | perror(path); |
759 | exit(1); | ||
694 | } | 760 | } |
695 | 761 | fscanf(filep, "%d", &first_cpu); | |
696 | /* | 762 | fclose(filep); |
697 | * insert after "prev" | 763 | return (cpu == first_cpu); |
698 | */ | ||
699 | new->next = prev->next; | ||
700 | prev->next = new; | ||
701 | } | 764 | } |
702 | 765 | ||
703 | void alloc_new_counters(int pkg, int core, int cpu) | 766 | /* |
767 | * cpu_is_first_core_in_package(cpu) | ||
768 | * return 1 if given CPU is 1st core in package | ||
769 | */ | ||
770 | int cpu_is_first_core_in_package(int cpu) | ||
704 | { | 771 | { |
705 | struct counters *new; | 772 | char path[64]; |
706 | 773 | FILE *filep; | |
707 | if (verbose > 1) | 774 | int first_cpu; |
708 | printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); | ||
709 | |||
710 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
711 | if (new == NULL) { | ||
712 | perror("calloc"); | ||
713 | exit(1); | ||
714 | } | ||
715 | new->pkg = pkg; | ||
716 | new->core = core; | ||
717 | new->cpu = cpu; | ||
718 | insert_counters(&cnt_odd, new); | ||
719 | |||
720 | new = (struct counters *)calloc(1, | ||
721 | sizeof(struct counters)); | ||
722 | if (new == NULL) { | ||
723 | perror("calloc"); | ||
724 | exit(1); | ||
725 | } | ||
726 | new->pkg = pkg; | ||
727 | new->core = core; | ||
728 | new->cpu = cpu; | ||
729 | insert_counters(&cnt_even, new); | ||
730 | |||
731 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
732 | if (new == NULL) { | ||
733 | perror("calloc"); | ||
734 | exit(1); | ||
735 | } | ||
736 | new->pkg = pkg; | ||
737 | new->core = core; | ||
738 | new->cpu = cpu; | ||
739 | insert_counters(&cnt_delta, new); | ||
740 | 775 | ||
741 | new = (struct counters *)calloc(1, sizeof(struct counters)); | 776 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); |
742 | if (new == NULL) { | 777 | filep = fopen(path, "r"); |
743 | perror("calloc"); | 778 | if (filep == NULL) { |
779 | perror(path); | ||
744 | exit(1); | 780 | exit(1); |
745 | } | 781 | } |
746 | new->pkg = pkg; | 782 | fscanf(filep, "%d", &first_cpu); |
747 | new->core = core; | 783 | fclose(filep); |
748 | new->cpu = cpu; | 784 | return (cpu == first_cpu); |
749 | cnt_average = new; | ||
750 | } | 785 | } |
751 | 786 | ||
752 | int get_physical_package_id(int cpu) | 787 | int get_physical_package_id(int cpu) |
753 | { | 788 | { |
754 | char path[64]; | 789 | char path[80]; |
755 | FILE *filep; | 790 | FILE *filep; |
756 | int pkg; | 791 | int pkg; |
757 | 792 | ||
@@ -768,7 +803,7 @@ int get_physical_package_id(int cpu) | |||
768 | 803 | ||
769 | int get_core_id(int cpu) | 804 | int get_core_id(int cpu) |
770 | { | 805 | { |
771 | char path[64]; | 806 | char path[80]; |
772 | FILE *filep; | 807 | FILE *filep; |
773 | int core; | 808 | int core; |
774 | 809 | ||
@@ -783,14 +818,87 @@ int get_core_id(int cpu) | |||
783 | return core; | 818 | return core; |
784 | } | 819 | } |
785 | 820 | ||
821 | int get_num_ht_siblings(int cpu) | ||
822 | { | ||
823 | char path[80]; | ||
824 | FILE *filep; | ||
825 | int sib1, sib2; | ||
826 | int matches; | ||
827 | char character; | ||
828 | |||
829 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | ||
830 | filep = fopen(path, "r"); | ||
831 | if (filep == NULL) { | ||
832 | perror(path); | ||
833 | exit(1); | ||
834 | } | ||
835 | /* | ||
836 | * file format: | ||
837 | * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) | ||
838 | * otherwinse 1 sibling (self). | ||
839 | */ | ||
840 | matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); | ||
841 | |||
842 | fclose(filep); | ||
843 | |||
844 | if (matches == 3) | ||
845 | return 2; | ||
846 | else | ||
847 | return 1; | ||
848 | } | ||
849 | |||
786 | /* | 850 | /* |
787 | * run func(pkg, core, cpu) on every cpu in /proc/stat | 851 | * run func(thread, core, package) in topology order |
852 | * skip non-present cpus | ||
788 | */ | 853 | */ |
789 | 854 | ||
790 | int for_all_cpus(void (func)(int, int, int)) | 855 | int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, |
856 | struct pkg_data *, struct thread_data *, struct core_data *, | ||
857 | struct pkg_data *), struct thread_data *thread_base, | ||
858 | struct core_data *core_base, struct pkg_data *pkg_base, | ||
859 | struct thread_data *thread_base2, struct core_data *core_base2, | ||
860 | struct pkg_data *pkg_base2) | ||
861 | { | ||
862 | int retval, pkg_no, core_no, thread_no; | ||
863 | |||
864 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { | ||
865 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { | ||
866 | for (thread_no = 0; thread_no < | ||
867 | topo.num_threads_per_core; ++thread_no) { | ||
868 | struct thread_data *t, *t2; | ||
869 | struct core_data *c, *c2; | ||
870 | struct pkg_data *p, *p2; | ||
871 | |||
872 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); | ||
873 | |||
874 | if (cpu_is_not_present(t->cpu_id)) | ||
875 | continue; | ||
876 | |||
877 | t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); | ||
878 | |||
879 | c = GET_CORE(core_base, core_no, pkg_no); | ||
880 | c2 = GET_CORE(core_base2, core_no, pkg_no); | ||
881 | |||
882 | p = GET_PKG(pkg_base, pkg_no); | ||
883 | p2 = GET_PKG(pkg_base2, pkg_no); | ||
884 | |||
885 | retval = func(t, c, p, t2, c2, p2); | ||
886 | if (retval) | ||
887 | return retval; | ||
888 | } | ||
889 | } | ||
890 | } | ||
891 | return 0; | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * run func(cpu) on every cpu in /proc/stat | ||
896 | * return max_cpu number | ||
897 | */ | ||
898 | int for_all_proc_cpus(int (func)(int)) | ||
791 | { | 899 | { |
792 | FILE *fp; | 900 | FILE *fp; |
793 | int cpu_count; | 901 | int cpu_num; |
794 | int retval; | 902 | int retval; |
795 | 903 | ||
796 | fp = fopen(proc_stat, "r"); | 904 | fp = fopen(proc_stat, "r"); |
@@ -805,78 +913,88 @@ int for_all_cpus(void (func)(int, int, int)) | |||
805 | exit(1); | 913 | exit(1); |
806 | } | 914 | } |
807 | 915 | ||
808 | for (cpu_count = 0; ; cpu_count++) { | 916 | while (1) { |
809 | int cpu; | 917 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); |
810 | |||
811 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); | ||
812 | if (retval != 1) | 918 | if (retval != 1) |
813 | break; | 919 | break; |
814 | 920 | ||
815 | func(get_physical_package_id(cpu), get_core_id(cpu), cpu); | 921 | retval = func(cpu_num); |
922 | if (retval) { | ||
923 | fclose(fp); | ||
924 | return(retval); | ||
925 | } | ||
816 | } | 926 | } |
817 | fclose(fp); | 927 | fclose(fp); |
818 | return cpu_count; | 928 | return 0; |
819 | } | 929 | } |
820 | 930 | ||
821 | void re_initialize(void) | 931 | void re_initialize(void) |
822 | { | 932 | { |
823 | free_all_counters(); | 933 | free_all_buffers(); |
824 | num_cpus = for_all_cpus(alloc_new_counters); | 934 | setup_all_buffers(); |
825 | cpu_mask_uninit(); | 935 | printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); |
826 | cpu_mask_init(num_cpus); | ||
827 | printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); | ||
828 | } | 936 | } |
829 | 937 | ||
830 | void dummy(int pkg, int core, int cpu) { return; } | 938 | |
831 | /* | 939 | /* |
832 | * check to see if a cpu came on-line | 940 | * count_cpus() |
941 | * remember the last one seen, it will be the max | ||
833 | */ | 942 | */ |
834 | int verify_num_cpus(void) | 943 | int count_cpus(int cpu) |
835 | { | 944 | { |
836 | int new_num_cpus; | 945 | if (topo.max_cpu_num < cpu) |
946 | topo.max_cpu_num = cpu; | ||
837 | 947 | ||
838 | new_num_cpus = for_all_cpus(dummy); | 948 | topo.num_cpus += 1; |
839 | 949 | return 0; | |
840 | if (new_num_cpus != num_cpus) { | 950 | } |
841 | if (verbose) | 951 | int mark_cpu_present(int cpu) |
842 | printf("num_cpus was %d, is now %d\n", | 952 | { |
843 | num_cpus, new_num_cpus); | 953 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); |
844 | return -1; | ||
845 | } | ||
846 | return 0; | 954 | return 0; |
847 | } | 955 | } |
848 | 956 | ||
849 | void turbostat_loop() | 957 | void turbostat_loop() |
850 | { | 958 | { |
959 | int retval; | ||
960 | |||
851 | restart: | 961 | restart: |
852 | get_counters(cnt_even); | 962 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
963 | if (retval) { | ||
964 | re_initialize(); | ||
965 | goto restart; | ||
966 | } | ||
853 | gettimeofday(&tv_even, (struct timezone *)NULL); | 967 | gettimeofday(&tv_even, (struct timezone *)NULL); |
854 | 968 | ||
855 | while (1) { | 969 | while (1) { |
856 | if (verify_num_cpus()) { | 970 | if (for_all_proc_cpus(cpu_is_not_present)) { |
857 | re_initialize(); | 971 | re_initialize(); |
858 | goto restart; | 972 | goto restart; |
859 | } | 973 | } |
860 | sleep(interval_sec); | 974 | sleep(interval_sec); |
861 | if (get_counters(cnt_odd)) { | 975 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
976 | if (retval) { | ||
862 | re_initialize(); | 977 | re_initialize(); |
863 | goto restart; | 978 | goto restart; |
864 | } | 979 | } |
865 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 980 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
866 | compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
867 | timersub(&tv_odd, &tv_even, &tv_delta); | 981 | timersub(&tv_odd, &tv_even, &tv_delta); |
868 | compute_average(cnt_delta, cnt_average); | 982 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
869 | print_counters(cnt_delta); | 983 | compute_average(EVEN_COUNTERS); |
984 | format_all_counters(EVEN_COUNTERS); | ||
985 | flush_stdout(); | ||
870 | sleep(interval_sec); | 986 | sleep(interval_sec); |
871 | if (get_counters(cnt_even)) { | 987 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
988 | if (retval) { | ||
872 | re_initialize(); | 989 | re_initialize(); |
873 | goto restart; | 990 | goto restart; |
874 | } | 991 | } |
875 | gettimeofday(&tv_even, (struct timezone *)NULL); | 992 | gettimeofday(&tv_even, (struct timezone *)NULL); |
876 | compute_delta(cnt_even, cnt_odd, cnt_delta); | ||
877 | timersub(&tv_even, &tv_odd, &tv_delta); | 993 | timersub(&tv_even, &tv_odd, &tv_delta); |
878 | compute_average(cnt_delta, cnt_average); | 994 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); |
879 | print_counters(cnt_delta); | 995 | compute_average(ODD_COUNTERS); |
996 | format_all_counters(ODD_COUNTERS); | ||
997 | flush_stdout(); | ||
880 | } | 998 | } |
881 | } | 999 | } |
882 | 1000 | ||
@@ -1051,6 +1169,208 @@ int open_dev_cpu_msr(int dummy1) | |||
1051 | return 0; | 1169 | return 0; |
1052 | } | 1170 | } |
1053 | 1171 | ||
1172 | void topology_probe() | ||
1173 | { | ||
1174 | int i; | ||
1175 | int max_core_id = 0; | ||
1176 | int max_package_id = 0; | ||
1177 | int max_siblings = 0; | ||
1178 | struct cpu_topology { | ||
1179 | int core_id; | ||
1180 | int physical_package_id; | ||
1181 | } *cpus; | ||
1182 | |||
1183 | /* Initialize num_cpus, max_cpu_num */ | ||
1184 | topo.num_cpus = 0; | ||
1185 | topo.max_cpu_num = 0; | ||
1186 | for_all_proc_cpus(count_cpus); | ||
1187 | if (!summary_only && topo.num_cpus > 1) | ||
1188 | show_cpu = 1; | ||
1189 | |||
1190 | if (verbose > 1) | ||
1191 | fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); | ||
1192 | |||
1193 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); | ||
1194 | if (cpus == NULL) { | ||
1195 | perror("calloc cpus"); | ||
1196 | exit(1); | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * Allocate and initialize cpu_present_set | ||
1201 | */ | ||
1202 | cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
1203 | if (cpu_present_set == NULL) { | ||
1204 | perror("CPU_ALLOC"); | ||
1205 | exit(3); | ||
1206 | } | ||
1207 | cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
1208 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
1209 | for_all_proc_cpus(mark_cpu_present); | ||
1210 | |||
1211 | /* | ||
1212 | * Allocate and initialize cpu_affinity_set | ||
1213 | */ | ||
1214 | cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
1215 | if (cpu_affinity_set == NULL) { | ||
1216 | perror("CPU_ALLOC"); | ||
1217 | exit(3); | ||
1218 | } | ||
1219 | cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
1220 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); | ||
1221 | |||
1222 | |||
1223 | /* | ||
1224 | * For online cpus | ||
1225 | * find max_core_id, max_package_id | ||
1226 | */ | ||
1227 | for (i = 0; i <= topo.max_cpu_num; ++i) { | ||
1228 | int siblings; | ||
1229 | |||
1230 | if (cpu_is_not_present(i)) { | ||
1231 | if (verbose > 1) | ||
1232 | fprintf(stderr, "cpu%d NOT PRESENT\n", i); | ||
1233 | continue; | ||
1234 | } | ||
1235 | cpus[i].core_id = get_core_id(i); | ||
1236 | if (cpus[i].core_id > max_core_id) | ||
1237 | max_core_id = cpus[i].core_id; | ||
1238 | |||
1239 | cpus[i].physical_package_id = get_physical_package_id(i); | ||
1240 | if (cpus[i].physical_package_id > max_package_id) | ||
1241 | max_package_id = cpus[i].physical_package_id; | ||
1242 | |||
1243 | siblings = get_num_ht_siblings(i); | ||
1244 | if (siblings > max_siblings) | ||
1245 | max_siblings = siblings; | ||
1246 | if (verbose > 1) | ||
1247 | fprintf(stderr, "cpu %d pkg %d core %d\n", | ||
1248 | i, cpus[i].physical_package_id, cpus[i].core_id); | ||
1249 | } | ||
1250 | topo.num_cores_per_pkg = max_core_id + 1; | ||
1251 | if (verbose > 1) | ||
1252 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | ||
1253 | max_core_id, topo.num_cores_per_pkg); | ||
1254 | if (!summary_only && topo.num_cores_per_pkg > 1) | ||
1255 | show_core = 1; | ||
1256 | |||
1257 | topo.num_packages = max_package_id + 1; | ||
1258 | if (verbose > 1) | ||
1259 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | ||
1260 | max_package_id, topo.num_packages); | ||
1261 | if (!summary_only && topo.num_packages > 1) | ||
1262 | show_pkg = 1; | ||
1263 | |||
1264 | topo.num_threads_per_core = max_siblings; | ||
1265 | if (verbose > 1) | ||
1266 | fprintf(stderr, "max_siblings %d\n", max_siblings); | ||
1267 | |||
1268 | free(cpus); | ||
1269 | } | ||
1270 | |||
1271 | void | ||
1272 | allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) | ||
1273 | { | ||
1274 | int i; | ||
1275 | |||
1276 | *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * | ||
1277 | topo.num_packages, sizeof(struct thread_data)); | ||
1278 | if (*t == NULL) | ||
1279 | goto error; | ||
1280 | |||
1281 | for (i = 0; i < topo.num_threads_per_core * | ||
1282 | topo.num_cores_per_pkg * topo.num_packages; i++) | ||
1283 | (*t)[i].cpu_id = -1; | ||
1284 | |||
1285 | *c = calloc(topo.num_cores_per_pkg * topo.num_packages, | ||
1286 | sizeof(struct core_data)); | ||
1287 | if (*c == NULL) | ||
1288 | goto error; | ||
1289 | |||
1290 | for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) | ||
1291 | (*c)[i].core_id = -1; | ||
1292 | |||
1293 | *p = calloc(topo.num_packages, sizeof(struct pkg_data)); | ||
1294 | if (*p == NULL) | ||
1295 | goto error; | ||
1296 | |||
1297 | for (i = 0; i < topo.num_packages; i++) | ||
1298 | (*p)[i].package_id = i; | ||
1299 | |||
1300 | return; | ||
1301 | error: | ||
1302 | perror("calloc counters"); | ||
1303 | exit(1); | ||
1304 | } | ||
1305 | /* | ||
1306 | * init_counter() | ||
1307 | * | ||
1308 | * set cpu_id, core_num, pkg_num | ||
1309 | * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE | ||
1310 | * | ||
1311 | * increment topo.num_cores when 1st core in pkg seen | ||
1312 | */ | ||
1313 | void init_counter(struct thread_data *thread_base, struct core_data *core_base, | ||
1314 | struct pkg_data *pkg_base, int thread_num, int core_num, | ||
1315 | int pkg_num, int cpu_id) | ||
1316 | { | ||
1317 | struct thread_data *t; | ||
1318 | struct core_data *c; | ||
1319 | struct pkg_data *p; | ||
1320 | |||
1321 | t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); | ||
1322 | c = GET_CORE(core_base, core_num, pkg_num); | ||
1323 | p = GET_PKG(pkg_base, pkg_num); | ||
1324 | |||
1325 | t->cpu_id = cpu_id; | ||
1326 | if (thread_num == 0) { | ||
1327 | t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; | ||
1328 | if (cpu_is_first_core_in_package(cpu_id)) | ||
1329 | t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
1330 | } | ||
1331 | |||
1332 | c->core_id = core_num; | ||
1333 | p->package_id = pkg_num; | ||
1334 | } | ||
1335 | |||
1336 | |||
1337 | int initialize_counters(int cpu_id) | ||
1338 | { | ||
1339 | int my_thread_id, my_core_id, my_package_id; | ||
1340 | |||
1341 | my_package_id = get_physical_package_id(cpu_id); | ||
1342 | my_core_id = get_core_id(cpu_id); | ||
1343 | |||
1344 | if (cpu_is_first_sibling_in_core(cpu_id)) { | ||
1345 | my_thread_id = 0; | ||
1346 | topo.num_cores++; | ||
1347 | } else { | ||
1348 | my_thread_id = 1; | ||
1349 | } | ||
1350 | |||
1351 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
1352 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
1353 | return 0; | ||
1354 | } | ||
1355 | |||
1356 | void allocate_output_buffer() | ||
1357 | { | ||
1358 | output_buffer = calloc(1, (1 + topo.num_cpus) * 128); | ||
1359 | outp = output_buffer; | ||
1360 | if (outp == NULL) { | ||
1361 | perror("calloc"); | ||
1362 | exit(-1); | ||
1363 | } | ||
1364 | } | ||
1365 | |||
1366 | void setup_all_buffers(void) | ||
1367 | { | ||
1368 | topology_probe(); | ||
1369 | allocate_counters(&thread_even, &core_even, &package_even); | ||
1370 | allocate_counters(&thread_odd, &core_odd, &package_odd); | ||
1371 | allocate_output_buffer(); | ||
1372 | for_all_proc_cpus(initialize_counters); | ||
1373 | } | ||
1054 | void turbostat_init() | 1374 | void turbostat_init() |
1055 | { | 1375 | { |
1056 | check_cpuid(); | 1376 | check_cpuid(); |
@@ -1058,21 +1378,19 @@ void turbostat_init() | |||
1058 | check_dev_msr(); | 1378 | check_dev_msr(); |
1059 | check_super_user(); | 1379 | check_super_user(); |
1060 | 1380 | ||
1061 | num_cpus = for_all_cpus(alloc_new_counters); | 1381 | setup_all_buffers(); |
1062 | cpu_mask_init(num_cpus); | ||
1063 | 1382 | ||
1064 | if (verbose) | 1383 | if (verbose) |
1065 | print_nehalem_info(); | 1384 | print_verbose_header(); |
1066 | } | 1385 | } |
1067 | 1386 | ||
1068 | int fork_it(char **argv) | 1387 | int fork_it(char **argv) |
1069 | { | 1388 | { |
1070 | int retval; | ||
1071 | pid_t child_pid; | 1389 | pid_t child_pid; |
1072 | get_counters(cnt_even); | ||
1073 | 1390 | ||
1074 | /* clear affinity side-effect of get_counters() */ | 1391 | for_all_cpus(get_counters, EVEN_COUNTERS); |
1075 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | 1392 | /* clear affinity side-effect of get_counters() */ |
1393 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | ||
1076 | gettimeofday(&tv_even, (struct timezone *)NULL); | 1394 | gettimeofday(&tv_even, (struct timezone *)NULL); |
1077 | 1395 | ||
1078 | child_pid = fork(); | 1396 | child_pid = fork(); |
@@ -1095,14 +1413,17 @@ int fork_it(char **argv) | |||
1095 | exit(1); | 1413 | exit(1); |
1096 | } | 1414 | } |
1097 | } | 1415 | } |
1098 | get_counters(cnt_odd); | 1416 | /* |
1417 | * n.b. fork_it() does not check for errors from for_all_cpus() | ||
1418 | * because re-starting is problematic when forking | ||
1419 | */ | ||
1420 | for_all_cpus(get_counters, ODD_COUNTERS); | ||
1099 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 1421 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
1100 | retval = compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
1101 | |||
1102 | timersub(&tv_odd, &tv_even, &tv_delta); | 1422 | timersub(&tv_odd, &tv_even, &tv_delta); |
1103 | compute_average(cnt_delta, cnt_average); | 1423 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
1104 | if (!retval) | 1424 | compute_average(EVEN_COUNTERS); |
1105 | print_counters(cnt_delta); | 1425 | format_all_counters(EVEN_COUNTERS); |
1426 | flush_stderr(); | ||
1106 | 1427 | ||
1107 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); | 1428 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); |
1108 | 1429 | ||
@@ -1115,8 +1436,14 @@ void cmdline(int argc, char **argv) | |||
1115 | 1436 | ||
1116 | progname = argv[0]; | 1437 | progname = argv[0]; |
1117 | 1438 | ||
1118 | while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { | 1439 | while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { |
1119 | switch (opt) { | 1440 | switch (opt) { |
1441 | case 'c': | ||
1442 | show_core_only++; | ||
1443 | break; | ||
1444 | case 'p': | ||
1445 | show_pkg_only++; | ||
1446 | break; | ||
1120 | case 's': | 1447 | case 's': |
1121 | summary_only++; | 1448 | summary_only++; |
1122 | break; | 1449 | break; |
@@ -1142,10 +1469,8 @@ int main(int argc, char **argv) | |||
1142 | cmdline(argc, argv); | 1469 | cmdline(argc, argv); |
1143 | 1470 | ||
1144 | if (verbose > 1) | 1471 | if (verbose > 1) |
1145 | fprintf(stderr, "turbostat Dec 6, 2010" | 1472 | fprintf(stderr, "turbostat v2.0 May 16, 2012" |
1146 | " - Len Brown <lenb@kernel.org>\n"); | 1473 | " - Len Brown <lenb@kernel.org>\n"); |
1147 | if (verbose > 1) | ||
1148 | fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); | ||
1149 | 1474 | ||
1150 | turbostat_init(); | 1475 | turbostat_init(); |
1151 | 1476 | ||