diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 1 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 77 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1333 |
3 files changed, 872 insertions, 539 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index fd8e1f1297aa..f85649554191 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | turbostat : turbostat.c | 1 | turbostat : turbostat.c |
2 | CFLAGS += -Wall | ||
2 | 3 | ||
3 | clean : | 4 | clean : |
4 | rm -f turbostat | 5 | rm -f turbostat |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index adf175f61496..74e44507dfe9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -27,7 +27,11 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. | |||
27 | on processors that additionally support C-state residency counters. | 27 | on processors that additionally support C-state residency counters. |
28 | 28 | ||
29 | .SS Options | 29 | .SS Options |
30 | The \fB-s\fP option prints only a 1-line summary for each sample interval. | 30 | The \fB-s\fP option limits output to a 1-line system summary for each interval. |
31 | .PP | ||
32 | The \fB-c\fP option limits output to the 1st thread in each core. | ||
33 | .PP | ||
34 | The \fB-p\fP option limits output to the 1st thread in each package. | ||
31 | .PP | 35 | .PP |
32 | The \fB-v\fP option increases verbosity. | 36 | The \fB-v\fP option increases verbosity. |
33 | .PP | 37 | .PP |
@@ -65,19 +69,19 @@ Subsequent rows show per-CPU statistics. | |||
65 | .nf | 69 | .nf |
66 | [root@x980]# ./turbostat | 70 | [root@x980]# ./turbostat |
67 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 71 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
68 | 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 | 72 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 |
69 | 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 | 73 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 |
70 | 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 | 74 | 0 6 0.05 1.62 3.38 10.34 |
71 | 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 | 75 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 |
72 | 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 | 76 | 1 8 0.03 1.62 3.38 0.06 |
73 | 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 | 77 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 |
74 | 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 | 78 | 2 10 0.02 1.62 3.38 0.29 |
75 | 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 | 79 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 |
76 | 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 | 80 | 8 7 0.01 1.62 3.38 0.06 |
77 | 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 | 81 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 |
78 | 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 | 82 | 9 9 0.02 1.62 3.38 0.60 |
79 | 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 | 83 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 |
80 | 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 | 84 | 10 11 0.02 1.62 3.38 0.02 |
81 | .fi | 85 | .fi |
82 | .SH SUMMARY EXAMPLE | 86 | .SH SUMMARY EXAMPLE |
83 | The "-s" option prints the column headers just once, | 87 | The "-s" option prints the column headers just once, |
@@ -86,9 +90,10 @@ and then the one line system summary for each sample interval. | |||
86 | .nf | 90 | .nf |
87 | [root@x980]# ./turbostat -s | 91 | [root@x980]# ./turbostat -s |
88 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 92 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
89 | 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 | 93 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 |
90 | 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 | 94 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 |
91 | 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 | 95 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 |
96 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | ||
92 | .fi | 97 | .fi |
93 | .SH VERBOSE EXAMPLE | 98 | .SH VERBOSE EXAMPLE |
94 | The "-v" option adds verbosity to the output: | 99 | The "-v" option adds verbosity to the output: |
@@ -120,30 +125,28 @@ until ^C while the other CPUs are mostly idle: | |||
120 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null | 125 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null |
121 | ^C | 126 | ^C |
122 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 127 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
123 | 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 | 128 | 8.86 3.61 3.38 15.06 31.19 44.89 0.00 0.00 |
124 | 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 | 129 | 0 0 1.46 3.22 3.38 16.84 29.48 52.22 0.00 0.00 |
125 | 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 | 130 | 0 6 0.21 3.06 3.38 18.09 |
126 | 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 | 131 | 1 2 0.53 3.33 3.38 2.80 46.40 50.27 |
127 | 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 | 132 | 1 8 0.89 3.47 3.38 2.44 |
128 | 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 | 133 | 2 4 1.36 3.43 3.38 9.04 23.71 65.89 |
129 | 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 | 134 | 2 10 0.18 2.86 3.38 10.22 |
130 | 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 | 135 | 8 1 0.04 2.87 3.38 99.96 0.01 0.00 |
131 | 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 | 136 | 8 7 99.72 3.63 3.38 0.27 |
132 | 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 | 137 | 9 3 0.31 3.21 3.38 7.64 56.55 35.50 |
133 | 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 | 138 | 9 9 0.08 2.95 3.38 7.88 |
134 | 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 | 139 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
135 | 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 | 140 | 10 11 0.16 2.88 3.38 3.40 |
136 | 4.907015 sec | ||
137 | |||
138 | .fi | 141 | .fi |
139 | Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit | 142 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit |
140 | while the other processors are generally in various states of idle. | 143 | while the other processors are generally in various states of idle. |
141 | 144 | ||
142 | Note that cpu0 is an HT sibling sharing core0 | 145 | Note that cpu1 and cpu7 are HT siblings within core8. |
143 | with cpu6, and thus it is unable to get to an idle state | 146 | As cpu7 is very busy, it prevents its sibling, cpu1, |
144 | deeper than c1 while cpu6 is busy. | 147 | from entering a c-state deeper than c1. |
145 | 148 | ||
146 | Note that turbostat reports average GHz of 3.64, while | 149 | Note that turbostat reports average GHz of 3.63, while |
147 | the arithmetic average of the GHz column above is lower. | 150 | the arithmetic average of the GHz column above is lower. |
148 | This is a weighted average, where the weight is %c0. ie. it is the total number of | 151 | This is a weighted average, where the weight is %c0. ie. it is the total number of |
149 | un-halted cycles elapsed per time divided by the number of CPUs. | 152 | un-halted cycles elapsed per time divided by the number of CPUs. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 16de7ad4850f..861d77190206 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -67,92 +67,119 @@ double bclk; | |||
67 | unsigned int show_pkg; | 67 | unsigned int show_pkg; |
68 | unsigned int show_core; | 68 | unsigned int show_core; |
69 | unsigned int show_cpu; | 69 | unsigned int show_cpu; |
70 | unsigned int show_pkg_only; | ||
71 | unsigned int show_core_only; | ||
72 | char *output_buffer, *outp; | ||
70 | 73 | ||
71 | int aperf_mperf_unstable; | 74 | int aperf_mperf_unstable; |
72 | int backwards_count; | 75 | int backwards_count; |
73 | char *progname; | 76 | char *progname; |
74 | 77 | ||
75 | int num_cpus; | 78 | cpu_set_t *cpu_present_set, *cpu_affinity_set; |
76 | cpu_set_t *cpu_present_set, *cpu_mask; | 79 | size_t cpu_present_setsize, cpu_affinity_setsize; |
77 | size_t cpu_present_setsize, cpu_mask_size; | 80 | |
78 | 81 | struct thread_data { | |
79 | struct counters { | 82 | unsigned long long tsc; |
80 | unsigned long long tsc; /* per thread */ | 83 | unsigned long long aperf; |
81 | unsigned long long aperf; /* per thread */ | 84 | unsigned long long mperf; |
82 | unsigned long long mperf; /* per thread */ | 85 | unsigned long long c1; /* derived */ |
83 | unsigned long long c1; /* per thread (calculated) */ | 86 | unsigned long long extra_msr; |
84 | unsigned long long c3; /* per core */ | 87 | unsigned int cpu_id; |
85 | unsigned long long c6; /* per core */ | 88 | unsigned int flags; |
86 | unsigned long long c7; /* per core */ | 89 | #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 |
87 | unsigned long long pc2; /* per package */ | 90 | #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 |
88 | unsigned long long pc3; /* per package */ | 91 | } *thread_even, *thread_odd; |
89 | unsigned long long pc6; /* per package */ | 92 | |
90 | unsigned long long pc7; /* per package */ | 93 | struct core_data { |
91 | unsigned long long extra_msr; /* per thread */ | 94 | unsigned long long c3; |
92 | int pkg; | 95 | unsigned long long c6; |
93 | int core; | 96 | unsigned long long c7; |
94 | int cpu; | 97 | unsigned int core_id; |
95 | struct counters *next; | 98 | } *core_even, *core_odd; |
96 | }; | 99 | |
97 | 100 | struct pkg_data { | |
98 | struct counters *cnt_even; | 101 | unsigned long long pc2; |
99 | struct counters *cnt_odd; | 102 | unsigned long long pc3; |
100 | struct counters *cnt_delta; | 103 | unsigned long long pc6; |
101 | struct counters *cnt_average; | 104 | unsigned long long pc7; |
102 | struct timeval tv_even; | 105 | unsigned int package_id; |
103 | struct timeval tv_odd; | 106 | } *package_even, *package_odd; |
104 | struct timeval tv_delta; | 107 | |
105 | 108 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | |
106 | int mark_cpu_present(int pkg, int core, int cpu) | 109 | #define EVEN_COUNTERS thread_even, core_even, package_even |
110 | |||
111 | #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ | ||
112 | (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ | ||
113 | topo.num_threads_per_core + \ | ||
114 | (core_no) * topo.num_threads_per_core + (thread_no)) | ||
115 | #define GET_CORE(core_base, core_no, pkg_no) \ | ||
116 | (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) | ||
117 | #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) | ||
118 | |||
119 | struct system_summary { | ||
120 | struct thread_data threads; | ||
121 | struct core_data cores; | ||
122 | struct pkg_data packages; | ||
123 | } sum, average; | ||
124 | |||
125 | |||
126 | struct topo_params { | ||
127 | int num_packages; | ||
128 | int num_cpus; | ||
129 | int num_cores; | ||
130 | int max_cpu_num; | ||
131 | int num_cores_per_pkg; | ||
132 | int num_threads_per_core; | ||
133 | } topo; | ||
134 | |||
135 | struct timeval tv_even, tv_odd, tv_delta; | ||
136 | |||
137 | void setup_all_buffers(void); | ||
138 | |||
139 | int cpu_is_not_present(int cpu) | ||
107 | { | 140 | { |
108 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); | 141 | return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); |
109 | return 0; | ||
110 | } | 142 | } |
111 | |||
112 | /* | 143 | /* |
113 | * cpu_mask_init(ncpus) | 144 | * run func(thread, core, package) in topology order |
114 | * | 145 | * skip non-present cpus |
115 | * allocate and clear cpu_mask | ||
116 | * set cpu_mask_size | ||
117 | */ | 146 | */ |
118 | void cpu_mask_init(int ncpus) | 147 | |
148 | int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), | ||
149 | struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) | ||
119 | { | 150 | { |
120 | cpu_mask = CPU_ALLOC(ncpus); | 151 | int retval, pkg_no, core_no, thread_no; |
121 | if (cpu_mask == NULL) { | ||
122 | perror("CPU_ALLOC"); | ||
123 | exit(3); | ||
124 | } | ||
125 | cpu_mask_size = CPU_ALLOC_SIZE(ncpus); | ||
126 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | ||
127 | 152 | ||
128 | /* | 153 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { |
129 | * Allocate and initialize cpu_present_set | 154 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { |
130 | */ | 155 | for (thread_no = 0; thread_no < |
131 | cpu_present_set = CPU_ALLOC(ncpus); | 156 | topo.num_threads_per_core; ++thread_no) { |
132 | if (cpu_present_set == NULL) { | 157 | struct thread_data *t; |
133 | perror("CPU_ALLOC"); | 158 | struct core_data *c; |
134 | exit(3); | 159 | struct pkg_data *p; |
135 | } | ||
136 | cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); | ||
137 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
138 | for_all_cpus(mark_cpu_present); | ||
139 | } | ||
140 | 160 | ||
141 | void cpu_mask_uninit() | 161 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); |
142 | { | 162 | |
143 | CPU_FREE(cpu_mask); | 163 | if (cpu_is_not_present(t->cpu_id)) |
144 | cpu_mask = NULL; | 164 | continue; |
145 | cpu_mask_size = 0; | 165 | |
146 | CPU_FREE(cpu_present_set); | 166 | c = GET_CORE(core_base, core_no, pkg_no); |
147 | cpu_present_set = NULL; | 167 | p = GET_PKG(pkg_base, pkg_no); |
148 | cpu_present_setsize = 0; | 168 | |
169 | retval = func(t, c, p); | ||
170 | if (retval) | ||
171 | return retval; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | return 0; | ||
149 | } | 176 | } |
150 | 177 | ||
151 | int cpu_migrate(int cpu) | 178 | int cpu_migrate(int cpu) |
152 | { | 179 | { |
153 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | 180 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); |
154 | CPU_SET_S(cpu, cpu_mask_size, cpu_mask); | 181 | CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); |
155 | if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) | 182 | if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) |
156 | return -1; | 183 | return -1; |
157 | else | 184 | else |
158 | return 0; | 185 | return 0; |
@@ -181,67 +208,72 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
181 | void print_header(void) | 208 | void print_header(void) |
182 | { | 209 | { |
183 | if (show_pkg) | 210 | if (show_pkg) |
184 | fprintf(stderr, "pk"); | 211 | outp += sprintf(outp, "pk"); |
185 | if (show_pkg) | 212 | if (show_pkg) |
186 | fprintf(stderr, " "); | 213 | outp += sprintf(outp, " "); |
187 | if (show_core) | 214 | if (show_core) |
188 | fprintf(stderr, "cor"); | 215 | outp += sprintf(outp, "cor"); |
189 | if (show_cpu) | 216 | if (show_cpu) |
190 | fprintf(stderr, " CPU"); | 217 | outp += sprintf(outp, " CPU"); |
191 | if (show_pkg || show_core || show_cpu) | 218 | if (show_pkg || show_core || show_cpu) |
192 | fprintf(stderr, " "); | 219 | outp += sprintf(outp, " "); |
193 | if (do_nhm_cstates) | 220 | if (do_nhm_cstates) |
194 | fprintf(stderr, " %%c0"); | 221 | outp += sprintf(outp, " %%c0"); |
195 | if (has_aperf) | 222 | if (has_aperf) |
196 | fprintf(stderr, " GHz"); | 223 | outp += sprintf(outp, " GHz"); |
197 | fprintf(stderr, " TSC"); | 224 | outp += sprintf(outp, " TSC"); |
198 | if (do_nhm_cstates) | 225 | if (do_nhm_cstates) |
199 | fprintf(stderr, " %%c1"); | 226 | outp += sprintf(outp, " %%c1"); |
200 | if (do_nhm_cstates) | 227 | if (do_nhm_cstates) |
201 | fprintf(stderr, " %%c3"); | 228 | outp += sprintf(outp, " %%c3"); |
202 | if (do_nhm_cstates) | 229 | if (do_nhm_cstates) |
203 | fprintf(stderr, " %%c6"); | 230 | outp += sprintf(outp, " %%c6"); |
204 | if (do_snb_cstates) | 231 | if (do_snb_cstates) |
205 | fprintf(stderr, " %%c7"); | 232 | outp += sprintf(outp, " %%c7"); |
206 | if (do_snb_cstates) | 233 | if (do_snb_cstates) |
207 | fprintf(stderr, " %%pc2"); | 234 | outp += sprintf(outp, " %%pc2"); |
208 | if (do_nhm_cstates) | 235 | if (do_nhm_cstates) |
209 | fprintf(stderr, " %%pc3"); | 236 | outp += sprintf(outp, " %%pc3"); |
210 | if (do_nhm_cstates) | 237 | if (do_nhm_cstates) |
211 | fprintf(stderr, " %%pc6"); | 238 | outp += sprintf(outp, " %%pc6"); |
212 | if (do_snb_cstates) | 239 | if (do_snb_cstates) |
213 | fprintf(stderr, " %%pc7"); | 240 | outp += sprintf(outp, " %%pc7"); |
214 | if (extra_msr_offset) | 241 | if (extra_msr_offset) |
215 | fprintf(stderr, " MSR 0x%x ", extra_msr_offset); | 242 | outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); |
216 | 243 | ||
217 | putc('\n', stderr); | 244 | outp += sprintf(outp, "\n"); |
218 | } | 245 | } |
219 | 246 | ||
220 | void dump_cnt(struct counters *cnt) | 247 | int dump_counters(struct thread_data *t, struct core_data *c, |
248 | struct pkg_data *p) | ||
221 | { | 249 | { |
222 | if (!cnt) | 250 | fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); |
223 | return; | 251 | |
224 | if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg); | 252 | if (t) { |
225 | if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core); | 253 | fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); |
226 | if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu); | 254 | fprintf(stderr, "TSC: %016llX\n", t->tsc); |
227 | if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc); | 255 | fprintf(stderr, "aperf: %016llX\n", t->aperf); |
228 | if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3); | 256 | fprintf(stderr, "mperf: %016llX\n", t->mperf); |
229 | if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6); | 257 | fprintf(stderr, "c1: %016llX\n", t->c1); |
230 | if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7); | 258 | fprintf(stderr, "msr0x%x: %016llX\n", |
231 | if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf); | 259 | extra_msr_offset, t->extra_msr); |
232 | if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2); | 260 | } |
233 | if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3); | ||
234 | if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6); | ||
235 | if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7); | ||
236 | if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr); | ||
237 | } | ||
238 | 261 | ||
239 | void dump_list(struct counters *cnt) | 262 | if (c) { |
240 | { | 263 | fprintf(stderr, "core: %d\n", c->core_id); |
241 | printf("dump_list 0x%p\n", cnt); | 264 | fprintf(stderr, "c3: %016llX\n", c->c3); |
265 | fprintf(stderr, "c6: %016llX\n", c->c6); | ||
266 | fprintf(stderr, "c7: %016llX\n", c->c7); | ||
267 | } | ||
242 | 268 | ||
243 | for (; cnt; cnt = cnt->next) | 269 | if (p) { |
244 | dump_cnt(cnt); | 270 | fprintf(stderr, "package: %d\n", p->package_id); |
271 | fprintf(stderr, "pc2: %016llX\n", p->pc2); | ||
272 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | ||
273 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | ||
274 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | ||
275 | } | ||
276 | return 0; | ||
245 | } | 277 | } |
246 | 278 | ||
247 | /* | 279 | /* |
@@ -253,321 +285,389 @@ void dump_list(struct counters *cnt) | |||
253 | * TSC: "TSC" 3 columns %3.2 | 285 | * TSC: "TSC" 3 columns %3.2 |
254 | * percentage " %pc3" %6.2 | 286 | * percentage " %pc3" %6.2 |
255 | */ | 287 | */ |
256 | void print_cnt(struct counters *p) | 288 | int format_counters(struct thread_data *t, struct core_data *c, |
289 | struct pkg_data *p) | ||
257 | { | 290 | { |
258 | double interval_float; | 291 | double interval_float; |
259 | 292 | ||
293 | /* if showing only 1st thread in core and this isn't one, bail out */ | ||
294 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
295 | return 0; | ||
296 | |||
297 | /* if showing only 1st thread in pkg and this isn't one, bail out */ | ||
298 | if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
299 | return 0; | ||
300 | |||
260 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; | 301 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; |
261 | 302 | ||
262 | /* topology columns, print blanks on 1st (average) line */ | 303 | /* topo columns, print blanks on 1st (average) line */ |
263 | if (p == cnt_average) { | 304 | if (t == &average.threads) { |
264 | if (show_pkg) | 305 | if (show_pkg) |
265 | fprintf(stderr, " "); | 306 | outp += sprintf(outp, " "); |
266 | if (show_pkg && show_core) | 307 | if (show_pkg && show_core) |
267 | fprintf(stderr, " "); | 308 | outp += sprintf(outp, " "); |
268 | if (show_core) | 309 | if (show_core) |
269 | fprintf(stderr, " "); | 310 | outp += sprintf(outp, " "); |
270 | if (show_cpu) | 311 | if (show_cpu) |
271 | fprintf(stderr, " " " "); | 312 | outp += sprintf(outp, " " " "); |
272 | } else { | 313 | } else { |
273 | if (show_pkg) | 314 | if (show_pkg) { |
274 | fprintf(stderr, "%2d", p->pkg); | 315 | if (p) |
316 | outp += sprintf(outp, "%2d", p->package_id); | ||
317 | else | ||
318 | outp += sprintf(outp, " "); | ||
319 | } | ||
275 | if (show_pkg && show_core) | 320 | if (show_pkg && show_core) |
276 | fprintf(stderr, " "); | 321 | outp += sprintf(outp, " "); |
277 | if (show_core) | 322 | if (show_core) { |
278 | fprintf(stderr, "%3d", p->core); | 323 | if (c) |
324 | outp += sprintf(outp, "%3d", c->core_id); | ||
325 | else | ||
326 | outp += sprintf(outp, " "); | ||
327 | } | ||
279 | if (show_cpu) | 328 | if (show_cpu) |
280 | fprintf(stderr, " %3d", p->cpu); | 329 | outp += sprintf(outp, " %3d", t->cpu_id); |
281 | } | 330 | } |
282 | 331 | ||
283 | /* %c0 */ | 332 | /* %c0 */ |
284 | if (do_nhm_cstates) { | 333 | if (do_nhm_cstates) { |
285 | if (show_pkg || show_core || show_cpu) | 334 | if (show_pkg || show_core || show_cpu) |
286 | fprintf(stderr, " "); | 335 | outp += sprintf(outp, " "); |
287 | if (!skip_c0) | 336 | if (!skip_c0) |
288 | fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); | 337 | outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); |
289 | else | 338 | else |
290 | fprintf(stderr, " ****"); | 339 | outp += sprintf(outp, " ****"); |
291 | } | 340 | } |
292 | 341 | ||
293 | /* GHz */ | 342 | /* GHz */ |
294 | if (has_aperf) { | 343 | if (has_aperf) { |
295 | if (!aperf_mperf_unstable) { | 344 | if (!aperf_mperf_unstable) { |
296 | fprintf(stderr, " %3.2f", | 345 | outp += sprintf(outp, " %3.2f", |
297 | 1.0 * p->tsc / units * p->aperf / | 346 | 1.0 * t->tsc / units * t->aperf / |
298 | p->mperf / interval_float); | 347 | t->mperf / interval_float); |
299 | } else { | 348 | } else { |
300 | if (p->aperf > p->tsc || p->mperf > p->tsc) { | 349 | if (t->aperf > t->tsc || t->mperf > t->tsc) { |
301 | fprintf(stderr, " ***"); | 350 | outp += sprintf(outp, " ***"); |
302 | } else { | 351 | } else { |
303 | fprintf(stderr, "%3.1f*", | 352 | outp += sprintf(outp, "%3.1f*", |
304 | 1.0 * p->tsc / | 353 | 1.0 * t->tsc / |
305 | units * p->aperf / | 354 | units * t->aperf / |
306 | p->mperf / interval_float); | 355 | t->mperf / interval_float); |
307 | } | 356 | } |
308 | } | 357 | } |
309 | } | 358 | } |
310 | 359 | ||
311 | /* TSC */ | 360 | /* TSC */ |
312 | fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); | 361 | outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); |
313 | 362 | ||
314 | if (do_nhm_cstates) { | 363 | if (do_nhm_cstates) { |
315 | if (!skip_c1) | 364 | if (!skip_c1) |
316 | fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); | 365 | outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); |
317 | else | 366 | else |
318 | fprintf(stderr, " ****"); | 367 | outp += sprintf(outp, " ****"); |
319 | } | 368 | } |
369 | |||
370 | /* print per-core data only for 1st thread in core */ | ||
371 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
372 | goto done; | ||
373 | |||
320 | if (do_nhm_cstates) | 374 | if (do_nhm_cstates) |
321 | fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); | 375 | outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); |
322 | if (do_nhm_cstates) | 376 | if (do_nhm_cstates) |
323 | fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); | 377 | outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); |
324 | if (do_snb_cstates) | 378 | if (do_snb_cstates) |
325 | fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); | 379 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
380 | |||
381 | /* print per-package data only for 1st core in package */ | ||
382 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
383 | goto done; | ||
384 | |||
326 | if (do_snb_cstates) | 385 | if (do_snb_cstates) |
327 | fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); | 386 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
328 | if (do_nhm_cstates) | 387 | if (do_nhm_cstates) |
329 | fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); | 388 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); |
330 | if (do_nhm_cstates) | 389 | if (do_nhm_cstates) |
331 | fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); | 390 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
332 | if (do_snb_cstates) | 391 | if (do_snb_cstates) |
333 | fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); | 392 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
393 | done: | ||
334 | if (extra_msr_offset) | 394 | if (extra_msr_offset) |
335 | fprintf(stderr, " 0x%016llx", p->extra_msr); | 395 | outp += sprintf(outp, " 0x%016llx", t->extra_msr); |
336 | putc('\n', stderr); | 396 | outp += sprintf(outp, "\n"); |
397 | |||
398 | return 0; | ||
337 | } | 399 | } |
338 | 400 | ||
339 | void print_counters(struct counters *counters) | 401 | void flush_stdout() |
402 | { | ||
403 | fputs(output_buffer, stdout); | ||
404 | outp = output_buffer; | ||
405 | } | ||
406 | void flush_stderr() | ||
407 | { | ||
408 | fputs(output_buffer, stderr); | ||
409 | outp = output_buffer; | ||
410 | } | ||
411 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
340 | { | 412 | { |
341 | struct counters *cnt; | ||
342 | static int printed; | 413 | static int printed; |
343 | 414 | ||
344 | |||
345 | if (!printed || !summary_only) | 415 | if (!printed || !summary_only) |
346 | print_header(); | 416 | print_header(); |
347 | 417 | ||
348 | if (num_cpus > 1) | 418 | if (topo.num_cpus > 1) |
349 | print_cnt(cnt_average); | 419 | format_counters(&average.threads, &average.cores, |
420 | &average.packages); | ||
350 | 421 | ||
351 | printed = 1; | 422 | printed = 1; |
352 | 423 | ||
353 | if (summary_only) | 424 | if (summary_only) |
354 | return; | 425 | return; |
355 | 426 | ||
356 | for (cnt = counters; cnt != NULL; cnt = cnt->next) | 427 | for_all_cpus(format_counters, t, c, p); |
357 | print_cnt(cnt); | ||
358 | |||
359 | } | 428 | } |
360 | 429 | ||
361 | #define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) | 430 | void |
431 | delta_package(struct pkg_data *new, struct pkg_data *old) | ||
432 | { | ||
433 | old->pc2 = new->pc2 - old->pc2; | ||
434 | old->pc3 = new->pc3 - old->pc3; | ||
435 | old->pc6 = new->pc6 - old->pc6; | ||
436 | old->pc7 = new->pc7 - old->pc7; | ||
437 | } | ||
362 | 438 | ||
363 | int compute_delta(struct counters *after, | 439 | void |
364 | struct counters *before, struct counters *delta) | 440 | delta_core(struct core_data *new, struct core_data *old) |
365 | { | 441 | { |
366 | int errors = 0; | 442 | old->c3 = new->c3 - old->c3; |
367 | int perf_err = 0; | 443 | old->c6 = new->c6 - old->c6; |
444 | old->c7 = new->c7 - old->c7; | ||
445 | } | ||
368 | 446 | ||
369 | skip_c0 = skip_c1 = 0; | 447 | /* |
448 | * old = new - old | ||
449 | */ | ||
450 | void | ||
451 | delta_thread(struct thread_data *new, struct thread_data *old, | ||
452 | struct core_data *core_delta) | ||
453 | { | ||
454 | old->tsc = new->tsc - old->tsc; | ||
455 | |||
456 | /* check for TSC < 1 Mcycles over interval */ | ||
457 | if (old->tsc < (1000 * 1000)) { | ||
458 | fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); | ||
459 | fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); | ||
460 | fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); | ||
461 | exit(-3); | ||
462 | } | ||
370 | 463 | ||
371 | for ( ; after && before && delta; | 464 | old->c1 = new->c1 - old->c1; |
372 | after = after->next, before = before->next, delta = delta->next) { | ||
373 | if (before->cpu != after->cpu) { | ||
374 | printf("cpu configuration changed: %d != %d\n", | ||
375 | before->cpu, after->cpu); | ||
376 | return -1; | ||
377 | } | ||
378 | 465 | ||
379 | if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { | 466 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { |
380 | fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", | 467 | old->aperf = new->aperf - old->aperf; |
381 | before->cpu, before->tsc, after->tsc); | 468 | old->mperf = new->mperf - old->mperf; |
382 | errors++; | 469 | } else { |
383 | } | ||
384 | /* check for TSC < 1 Mcycles over interval */ | ||
385 | if (delta->tsc < (1000 * 1000)) { | ||
386 | fprintf(stderr, "Insanely slow TSC rate," | ||
387 | " TSC stops in idle?\n"); | ||
388 | fprintf(stderr, "You can disable all c-states" | ||
389 | " by booting with \"idle=poll\"\n"); | ||
390 | fprintf(stderr, "or just the deep ones with" | ||
391 | " \"processor.max_cstate=1\"\n"); | ||
392 | exit(-3); | ||
393 | } | ||
394 | if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { | ||
395 | fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", | ||
396 | before->cpu, before->c3, after->c3); | ||
397 | errors++; | ||
398 | } | ||
399 | if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { | ||
400 | fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", | ||
401 | before->cpu, before->c6, after->c6); | ||
402 | errors++; | ||
403 | } | ||
404 | if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { | ||
405 | fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", | ||
406 | before->cpu, before->c7, after->c7); | ||
407 | errors++; | ||
408 | } | ||
409 | if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { | ||
410 | fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", | ||
411 | before->cpu, before->pc2, after->pc2); | ||
412 | errors++; | ||
413 | } | ||
414 | if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { | ||
415 | fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", | ||
416 | before->cpu, before->pc3, after->pc3); | ||
417 | errors++; | ||
418 | } | ||
419 | if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { | ||
420 | fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", | ||
421 | before->cpu, before->pc6, after->pc6); | ||
422 | errors++; | ||
423 | } | ||
424 | if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { | ||
425 | fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", | ||
426 | before->cpu, before->pc7, after->pc7); | ||
427 | errors++; | ||
428 | } | ||
429 | 470 | ||
430 | perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); | 471 | if (!aperf_mperf_unstable) { |
431 | if (perf_err) { | 472 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); |
432 | fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", | 473 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); |
433 | before->cpu, before->aperf, after->aperf); | 474 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); |
434 | } | ||
435 | perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); | ||
436 | if (perf_err) { | ||
437 | fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", | ||
438 | before->cpu, before->mperf, after->mperf); | ||
439 | } | ||
440 | if (perf_err) { | ||
441 | if (!aperf_mperf_unstable) { | ||
442 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | ||
443 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | ||
444 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | ||
445 | 475 | ||
446 | aperf_mperf_unstable = 1; | 476 | aperf_mperf_unstable = 1; |
447 | } | ||
448 | /* | ||
449 | * mperf delta is likely a huge "positive" number | ||
450 | * can not use it for calculating c0 time | ||
451 | */ | ||
452 | skip_c0 = 1; | ||
453 | skip_c1 = 1; | ||
454 | } | 477 | } |
455 | |||
456 | /* | 478 | /* |
457 | * As mperf and tsc collection are not atomic, | 479 | * mperf delta is likely a huge "positive" number |
458 | * it is possible for mperf's non-halted cycles | 480 | * can not use it for calculating c0 time |
459 | * to exceed TSC's all cycles: show c1 = 0% in that case. | ||
460 | */ | 481 | */ |
461 | if (delta->mperf > delta->tsc) | 482 | skip_c0 = 1; |
462 | delta->c1 = 0; | 483 | skip_c1 = 1; |
463 | else /* normal case, derive c1 */ | 484 | } |
464 | delta->c1 = delta->tsc - delta->mperf | ||
465 | - delta->c3 - delta->c6 - delta->c7; | ||
466 | 485 | ||
467 | if (delta->mperf == 0) | ||
468 | delta->mperf = 1; /* divide by 0 protection */ | ||
469 | 486 | ||
470 | /* | 487 | /* |
471 | * for "extra msr", just copy the latest w/o subtracting | 488 | * As counter collection is not atomic, |
472 | */ | 489 | * it is possible for mperf's non-halted cycles + idle states |
473 | delta->extra_msr = after->extra_msr; | 490 | * to exceed TSC's all cycles: show c1 = 0% in that case. |
474 | if (errors) { | 491 | */ |
475 | fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); | 492 | if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) |
476 | dump_cnt(before); | 493 | old->c1 = 0; |
477 | fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); | 494 | else { |
478 | dump_cnt(after); | 495 | /* normal case, derive c1 */ |
479 | errors = 0; | 496 | old->c1 = old->tsc - old->mperf - core_delta->c3 |
480 | } | 497 | - core_delta->c6 - core_delta->c7; |
481 | } | 498 | } |
499 | |||
500 | if (old->mperf == 0) { | ||
501 | if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); | ||
502 | old->mperf = 1; /* divide by 0 protection */ | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * for "extra msr", just copy the latest w/o subtracting | ||
507 | */ | ||
508 | old->extra_msr = new->extra_msr; | ||
509 | } | ||
510 | |||
511 | int delta_cpu(struct thread_data *t, struct core_data *c, | ||
512 | struct pkg_data *p, struct thread_data *t2, | ||
513 | struct core_data *c2, struct pkg_data *p2) | ||
514 | { | ||
515 | /* calculate core delta only for 1st thread in core */ | ||
516 | if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) | ||
517 | delta_core(c, c2); | ||
518 | |||
519 | /* always calculate thread delta */ | ||
520 | delta_thread(t, t2, c2); /* c2 is core delta */ | ||
521 | |||
522 | /* calculate package delta only for 1st core in package */ | ||
523 | if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) | ||
524 | delta_package(p, p2); | ||
525 | |||
482 | return 0; | 526 | return 0; |
483 | } | 527 | } |
484 | 528 | ||
485 | void compute_average(struct counters *delta, struct counters *avg) | 529 | void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
530 | { | ||
531 | t->tsc = 0; | ||
532 | t->aperf = 0; | ||
533 | t->mperf = 0; | ||
534 | t->c1 = 0; | ||
535 | |||
536 | /* tells format_counters to dump all fields from this set */ | ||
537 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
538 | |||
539 | c->c3 = 0; | ||
540 | c->c6 = 0; | ||
541 | c->c7 = 0; | ||
542 | |||
543 | p->pc2 = 0; | ||
544 | p->pc3 = 0; | ||
545 | p->pc6 = 0; | ||
546 | p->pc7 = 0; | ||
547 | } | ||
548 | int sum_counters(struct thread_data *t, struct core_data *c, | ||
549 | struct pkg_data *p) | ||
486 | { | 550 | { |
487 | struct counters *sum; | 551 | average.threads.tsc += t->tsc; |
552 | average.threads.aperf += t->aperf; | ||
553 | average.threads.mperf += t->mperf; | ||
554 | average.threads.c1 += t->c1; | ||
488 | 555 | ||
489 | sum = calloc(1, sizeof(struct counters)); | 556 | /* sum per-core values only for 1st thread in core */ |
490 | if (sum == NULL) { | 557 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
491 | perror("calloc sum"); | 558 | return 0; |
492 | exit(1); | ||
493 | } | ||
494 | 559 | ||
495 | for (; delta; delta = delta->next) { | 560 | average.cores.c3 += c->c3; |
496 | sum->tsc += delta->tsc; | 561 | average.cores.c6 += c->c6; |
497 | sum->c1 += delta->c1; | 562 | average.cores.c7 += c->c7; |
498 | sum->c3 += delta->c3; | 563 | |
499 | sum->c6 += delta->c6; | 564 | /* sum per-pkg values only for 1st core in pkg */ |
500 | sum->c7 += delta->c7; | 565 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
501 | sum->aperf += delta->aperf; | 566 | return 0; |
502 | sum->mperf += delta->mperf; | 567 | |
503 | sum->pc2 += delta->pc2; | 568 | average.packages.pc2 += p->pc2; |
504 | sum->pc3 += delta->pc3; | 569 | average.packages.pc3 += p->pc3; |
505 | sum->pc6 += delta->pc6; | 570 | average.packages.pc6 += p->pc6; |
506 | sum->pc7 += delta->pc7; | 571 | average.packages.pc7 += p->pc7; |
507 | } | 572 | |
508 | avg->tsc = sum->tsc/num_cpus; | 573 | return 0; |
509 | avg->c1 = sum->c1/num_cpus; | 574 | } |
510 | avg->c3 = sum->c3/num_cpus; | 575 | /* |
511 | avg->c6 = sum->c6/num_cpus; | 576 | * sum the counters for all cpus in the system |
512 | avg->c7 = sum->c7/num_cpus; | 577 | * compute the weighted average |
513 | avg->aperf = sum->aperf/num_cpus; | 578 | */ |
514 | avg->mperf = sum->mperf/num_cpus; | 579 | void compute_average(struct thread_data *t, struct core_data *c, |
515 | avg->pc2 = sum->pc2/num_cpus; | 580 | struct pkg_data *p) |
516 | avg->pc3 = sum->pc3/num_cpus; | 581 | { |
517 | avg->pc6 = sum->pc6/num_cpus; | 582 | clear_counters(&average.threads, &average.cores, &average.packages); |
518 | avg->pc7 = sum->pc7/num_cpus; | 583 | |
519 | 584 | for_all_cpus(sum_counters, t, c, p); | |
520 | free(sum); | 585 | |
586 | average.threads.tsc /= topo.num_cpus; | ||
587 | average.threads.aperf /= topo.num_cpus; | ||
588 | average.threads.mperf /= topo.num_cpus; | ||
589 | average.threads.c1 /= topo.num_cpus; | ||
590 | |||
591 | average.cores.c3 /= topo.num_cores; | ||
592 | average.cores.c6 /= topo.num_cores; | ||
593 | average.cores.c7 /= topo.num_cores; | ||
594 | |||
595 | average.packages.pc2 /= topo.num_packages; | ||
596 | average.packages.pc3 /= topo.num_packages; | ||
597 | average.packages.pc6 /= topo.num_packages; | ||
598 | average.packages.pc7 /= topo.num_packages; | ||
521 | } | 599 | } |
522 | 600 | ||
523 | int get_counters(struct counters *cnt) | 601 | static unsigned long long rdtsc(void) |
524 | { | 602 | { |
525 | for ( ; cnt; cnt = cnt->next) { | 603 | unsigned int low, high; |
526 | 604 | ||
527 | if (cpu_migrate(cnt->cpu)) | 605 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); |
528 | return -1; | ||
529 | 606 | ||
530 | if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) | 607 | return low | ((unsigned long long)high) << 32; |
531 | return -1; | 608 | } |
532 | 609 | ||
533 | if (has_aperf) { | ||
534 | if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) | ||
535 | return -1; | ||
536 | if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) | ||
537 | return -1; | ||
538 | } | ||
539 | 610 | ||
540 | if (do_nhm_cstates) { | 611 | /* |
541 | if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) | 612 | * get_counters(...) |
542 | return -1; | 613 | * migrate to cpu |
543 | if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) | 614 | * acquire and record local counters for that cpu |
544 | return -1; | 615 | */ |
545 | } | 616 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
617 | { | ||
618 | int cpu = t->cpu_id; | ||
546 | 619 | ||
547 | if (do_snb_cstates) | 620 | if (cpu_migrate(cpu)) |
548 | if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) | 621 | return -1; |
549 | return -1; | ||
550 | 622 | ||
551 | if (do_nhm_cstates) { | 623 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
552 | if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) | 624 | |
553 | return -1; | 625 | if (has_aperf) { |
554 | if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) | 626 | if (get_msr(cpu, MSR_APERF, &t->aperf)) |
555 | return -1; | 627 | return -3; |
556 | } | 628 | if (get_msr(cpu, MSR_MPERF, &t->mperf)) |
557 | if (do_snb_cstates) { | 629 | return -4; |
558 | if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) | 630 | } |
559 | return -1; | 631 | |
560 | if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) | 632 | if (extra_msr_offset) |
561 | return -1; | 633 | if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) |
562 | } | 634 | return -5; |
563 | if (extra_msr_offset) | 635 | |
564 | if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) | 636 | /* collect core counters only for 1st thread in core */ |
565 | return -1; | 637 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
638 | return 0; | ||
639 | |||
640 | if (do_nhm_cstates) { | ||
641 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | ||
642 | return -6; | ||
643 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) | ||
644 | return -7; | ||
645 | } | ||
646 | |||
647 | if (do_snb_cstates) | ||
648 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | ||
649 | return -8; | ||
650 | |||
651 | /* collect package counters only for 1st core in package */ | ||
652 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
653 | return 0; | ||
654 | |||
655 | if (do_nhm_cstates) { | ||
656 | if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) | ||
657 | return -9; | ||
658 | if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) | ||
659 | return -10; | ||
660 | } | ||
661 | if (do_snb_cstates) { | ||
662 | if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) | ||
663 | return -11; | ||
664 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | ||
665 | return -12; | ||
566 | } | 666 | } |
567 | return 0; | 667 | return 0; |
568 | } | 668 | } |
569 | 669 | ||
570 | void print_nehalem_info(void) | 670 | void print_verbose_header(void) |
571 | { | 671 | { |
572 | unsigned long long msr; | 672 | unsigned long long msr; |
573 | unsigned int ratio; | 673 | unsigned int ratio; |
@@ -615,143 +715,82 @@ void print_nehalem_info(void) | |||
615 | 715 | ||
616 | } | 716 | } |
617 | 717 | ||
618 | void free_counter_list(struct counters *list) | 718 | void free_all_buffers(void) |
619 | { | 719 | { |
620 | struct counters *p; | 720 | CPU_FREE(cpu_present_set); |
721 | cpu_present_set = NULL; | ||
722 | cpu_present_set = 0; | ||
621 | 723 | ||
622 | for (p = list; p; ) { | 724 | CPU_FREE(cpu_affinity_set); |
623 | struct counters *free_me; | 725 | cpu_affinity_set = NULL; |
726 | cpu_affinity_setsize = 0; | ||
624 | 727 | ||
625 | free_me = p; | 728 | free(thread_even); |
626 | p = p->next; | 729 | free(core_even); |
627 | free(free_me); | 730 | free(package_even); |
628 | } | ||
629 | } | ||
630 | 731 | ||
631 | void free_all_counters(void) | 732 | thread_even = NULL; |
632 | { | 733 | core_even = NULL; |
633 | free_counter_list(cnt_even); | 734 | package_even = NULL; |
634 | cnt_even = NULL; | ||
635 | 735 | ||
636 | free_counter_list(cnt_odd); | 736 | free(thread_odd); |
637 | cnt_odd = NULL; | 737 | free(core_odd); |
738 | free(package_odd); | ||
638 | 739 | ||
639 | free_counter_list(cnt_delta); | 740 | thread_odd = NULL; |
640 | cnt_delta = NULL; | 741 | core_odd = NULL; |
742 | package_odd = NULL; | ||
641 | 743 | ||
642 | free_counter_list(cnt_average); | 744 | free(output_buffer); |
643 | cnt_average = NULL; | 745 | output_buffer = NULL; |
746 | outp = NULL; | ||
644 | } | 747 | } |
645 | 748 | ||
646 | void insert_counters(struct counters **list, | 749 | /* |
647 | struct counters *new) | 750 | * cpu_is_first_sibling_in_core(cpu) |
751 | * return 1 if given CPU is 1st HT sibling in the core | ||
752 | */ | ||
753 | int cpu_is_first_sibling_in_core(int cpu) | ||
648 | { | 754 | { |
649 | struct counters *prev; | 755 | char path[64]; |
650 | 756 | FILE *filep; | |
651 | /* | 757 | int first_cpu; |
652 | * list was empty | ||
653 | */ | ||
654 | if (*list == NULL) { | ||
655 | new->next = *list; | ||
656 | *list = new; | ||
657 | return; | ||
658 | } | ||
659 | |||
660 | if (!summary_only) | ||
661 | show_cpu = 1; /* there is more than one CPU */ | ||
662 | |||
663 | /* | ||
664 | * insert on front of list. | ||
665 | * It is sorted by ascending package#, core#, cpu# | ||
666 | */ | ||
667 | if (((*list)->pkg > new->pkg) || | ||
668 | (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || | ||
669 | (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { | ||
670 | new->next = *list; | ||
671 | *list = new; | ||
672 | return; | ||
673 | } | ||
674 | |||
675 | prev = *list; | ||
676 | |||
677 | while (prev->next && (prev->next->pkg < new->pkg)) { | ||
678 | prev = prev->next; | ||
679 | if (!summary_only) | ||
680 | show_pkg = 1; /* there is more than 1 package */ | ||
681 | } | ||
682 | |||
683 | while (prev->next && (prev->next->pkg == new->pkg) | ||
684 | && (prev->next->core < new->core)) { | ||
685 | prev = prev->next; | ||
686 | if (!summary_only) | ||
687 | show_core = 1; /* there is more than 1 core */ | ||
688 | } | ||
689 | 758 | ||
690 | while (prev->next && (prev->next->pkg == new->pkg) | 759 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); |
691 | && (prev->next->core == new->core) | 760 | filep = fopen(path, "r"); |
692 | && (prev->next->cpu < new->cpu)) { | 761 | if (filep == NULL) { |
693 | prev = prev->next; | 762 | perror(path); |
763 | exit(1); | ||
694 | } | 764 | } |
695 | 765 | fscanf(filep, "%d", &first_cpu); | |
696 | /* | 766 | fclose(filep); |
697 | * insert after "prev" | 767 | return (cpu == first_cpu); |
698 | */ | ||
699 | new->next = prev->next; | ||
700 | prev->next = new; | ||
701 | } | 768 | } |
702 | 769 | ||
703 | void alloc_new_counters(int pkg, int core, int cpu) | 770 | /* |
771 | * cpu_is_first_core_in_package(cpu) | ||
772 | * return 1 if given CPU is 1st core in package | ||
773 | */ | ||
774 | int cpu_is_first_core_in_package(int cpu) | ||
704 | { | 775 | { |
705 | struct counters *new; | 776 | char path[64]; |
706 | 777 | FILE *filep; | |
707 | if (verbose > 1) | 778 | int first_cpu; |
708 | printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); | ||
709 | |||
710 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
711 | if (new == NULL) { | ||
712 | perror("calloc"); | ||
713 | exit(1); | ||
714 | } | ||
715 | new->pkg = pkg; | ||
716 | new->core = core; | ||
717 | new->cpu = cpu; | ||
718 | insert_counters(&cnt_odd, new); | ||
719 | |||
720 | new = (struct counters *)calloc(1, | ||
721 | sizeof(struct counters)); | ||
722 | if (new == NULL) { | ||
723 | perror("calloc"); | ||
724 | exit(1); | ||
725 | } | ||
726 | new->pkg = pkg; | ||
727 | new->core = core; | ||
728 | new->cpu = cpu; | ||
729 | insert_counters(&cnt_even, new); | ||
730 | |||
731 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
732 | if (new == NULL) { | ||
733 | perror("calloc"); | ||
734 | exit(1); | ||
735 | } | ||
736 | new->pkg = pkg; | ||
737 | new->core = core; | ||
738 | new->cpu = cpu; | ||
739 | insert_counters(&cnt_delta, new); | ||
740 | 779 | ||
741 | new = (struct counters *)calloc(1, sizeof(struct counters)); | 780 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); |
742 | if (new == NULL) { | 781 | filep = fopen(path, "r"); |
743 | perror("calloc"); | 782 | if (filep == NULL) { |
783 | perror(path); | ||
744 | exit(1); | 784 | exit(1); |
745 | } | 785 | } |
746 | new->pkg = pkg; | 786 | fscanf(filep, "%d", &first_cpu); |
747 | new->core = core; | 787 | fclose(filep); |
748 | new->cpu = cpu; | 788 | return (cpu == first_cpu); |
749 | cnt_average = new; | ||
750 | } | 789 | } |
751 | 790 | ||
752 | int get_physical_package_id(int cpu) | 791 | int get_physical_package_id(int cpu) |
753 | { | 792 | { |
754 | char path[64]; | 793 | char path[80]; |
755 | FILE *filep; | 794 | FILE *filep; |
756 | int pkg; | 795 | int pkg; |
757 | 796 | ||
@@ -768,7 +807,7 @@ int get_physical_package_id(int cpu) | |||
768 | 807 | ||
769 | int get_core_id(int cpu) | 808 | int get_core_id(int cpu) |
770 | { | 809 | { |
771 | char path[64]; | 810 | char path[80]; |
772 | FILE *filep; | 811 | FILE *filep; |
773 | int core; | 812 | int core; |
774 | 813 | ||
@@ -783,14 +822,87 @@ int get_core_id(int cpu) | |||
783 | return core; | 822 | return core; |
784 | } | 823 | } |
785 | 824 | ||
825 | int get_num_ht_siblings(int cpu) | ||
826 | { | ||
827 | char path[80]; | ||
828 | FILE *filep; | ||
829 | int sib1, sib2; | ||
830 | int matches; | ||
831 | char character; | ||
832 | |||
833 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | ||
834 | filep = fopen(path, "r"); | ||
835 | if (filep == NULL) { | ||
836 | perror(path); | ||
837 | exit(1); | ||
838 | } | ||
839 | /* | ||
840 | * file format: | ||
841 | * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) | ||
842 | * otherwinse 1 sibling (self). | ||
843 | */ | ||
844 | matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); | ||
845 | |||
846 | fclose(filep); | ||
847 | |||
848 | if (matches == 3) | ||
849 | return 2; | ||
850 | else | ||
851 | return 1; | ||
852 | } | ||
853 | |||
786 | /* | 854 | /* |
787 | * run func(pkg, core, cpu) on every cpu in /proc/stat | 855 | * run func(thread, core, package) in topology order |
856 | * skip non-present cpus | ||
788 | */ | 857 | */ |
789 | 858 | ||
790 | int for_all_cpus(void (func)(int, int, int)) | 859 | int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, |
860 | struct pkg_data *, struct thread_data *, struct core_data *, | ||
861 | struct pkg_data *), struct thread_data *thread_base, | ||
862 | struct core_data *core_base, struct pkg_data *pkg_base, | ||
863 | struct thread_data *thread_base2, struct core_data *core_base2, | ||
864 | struct pkg_data *pkg_base2) | ||
865 | { | ||
866 | int retval, pkg_no, core_no, thread_no; | ||
867 | |||
868 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { | ||
869 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { | ||
870 | for (thread_no = 0; thread_no < | ||
871 | topo.num_threads_per_core; ++thread_no) { | ||
872 | struct thread_data *t, *t2; | ||
873 | struct core_data *c, *c2; | ||
874 | struct pkg_data *p, *p2; | ||
875 | |||
876 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); | ||
877 | |||
878 | if (cpu_is_not_present(t->cpu_id)) | ||
879 | continue; | ||
880 | |||
881 | t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); | ||
882 | |||
883 | c = GET_CORE(core_base, core_no, pkg_no); | ||
884 | c2 = GET_CORE(core_base2, core_no, pkg_no); | ||
885 | |||
886 | p = GET_PKG(pkg_base, pkg_no); | ||
887 | p2 = GET_PKG(pkg_base2, pkg_no); | ||
888 | |||
889 | retval = func(t, c, p, t2, c2, p2); | ||
890 | if (retval) | ||
891 | return retval; | ||
892 | } | ||
893 | } | ||
894 | } | ||
895 | return 0; | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * run func(cpu) on every cpu in /proc/stat | ||
900 | * return max_cpu number | ||
901 | */ | ||
902 | int for_all_proc_cpus(int (func)(int)) | ||
791 | { | 903 | { |
792 | FILE *fp; | 904 | FILE *fp; |
793 | int cpu_count; | 905 | int cpu_num; |
794 | int retval; | 906 | int retval; |
795 | 907 | ||
796 | fp = fopen(proc_stat, "r"); | 908 | fp = fopen(proc_stat, "r"); |
@@ -805,78 +917,88 @@ int for_all_cpus(void (func)(int, int, int)) | |||
805 | exit(1); | 917 | exit(1); |
806 | } | 918 | } |
807 | 919 | ||
808 | for (cpu_count = 0; ; cpu_count++) { | 920 | while (1) { |
809 | int cpu; | 921 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); |
810 | |||
811 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); | ||
812 | if (retval != 1) | 922 | if (retval != 1) |
813 | break; | 923 | break; |
814 | 924 | ||
815 | func(get_physical_package_id(cpu), get_core_id(cpu), cpu); | 925 | retval = func(cpu_num); |
926 | if (retval) { | ||
927 | fclose(fp); | ||
928 | return(retval); | ||
929 | } | ||
816 | } | 930 | } |
817 | fclose(fp); | 931 | fclose(fp); |
818 | return cpu_count; | 932 | return 0; |
819 | } | 933 | } |
820 | 934 | ||
821 | void re_initialize(void) | 935 | void re_initialize(void) |
822 | { | 936 | { |
823 | free_all_counters(); | 937 | free_all_buffers(); |
824 | num_cpus = for_all_cpus(alloc_new_counters); | 938 | setup_all_buffers(); |
825 | cpu_mask_uninit(); | 939 | printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); |
826 | cpu_mask_init(num_cpus); | ||
827 | printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); | ||
828 | } | 940 | } |
829 | 941 | ||
830 | void dummy(int pkg, int core, int cpu) { return; } | 942 | |
831 | /* | 943 | /* |
832 | * check to see if a cpu came on-line | 944 | * count_cpus() |
945 | * remember the last one seen, it will be the max | ||
833 | */ | 946 | */ |
834 | int verify_num_cpus(void) | 947 | int count_cpus(int cpu) |
835 | { | 948 | { |
836 | int new_num_cpus; | 949 | if (topo.max_cpu_num < cpu) |
837 | 950 | topo.max_cpu_num = cpu; | |
838 | new_num_cpus = for_all_cpus(dummy); | ||
839 | 951 | ||
840 | if (new_num_cpus != num_cpus) { | 952 | topo.num_cpus += 1; |
841 | if (verbose) | 953 | return 0; |
842 | printf("num_cpus was %d, is now %d\n", | 954 | } |
843 | num_cpus, new_num_cpus); | 955 | int mark_cpu_present(int cpu) |
844 | return -1; | 956 | { |
845 | } | 957 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); |
846 | return 0; | 958 | return 0; |
847 | } | 959 | } |
848 | 960 | ||
849 | void turbostat_loop() | 961 | void turbostat_loop() |
850 | { | 962 | { |
963 | int retval; | ||
964 | |||
851 | restart: | 965 | restart: |
852 | get_counters(cnt_even); | 966 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
967 | if (retval) { | ||
968 | re_initialize(); | ||
969 | goto restart; | ||
970 | } | ||
853 | gettimeofday(&tv_even, (struct timezone *)NULL); | 971 | gettimeofday(&tv_even, (struct timezone *)NULL); |
854 | 972 | ||
855 | while (1) { | 973 | while (1) { |
856 | if (verify_num_cpus()) { | 974 | if (for_all_proc_cpus(cpu_is_not_present)) { |
857 | re_initialize(); | 975 | re_initialize(); |
858 | goto restart; | 976 | goto restart; |
859 | } | 977 | } |
860 | sleep(interval_sec); | 978 | sleep(interval_sec); |
861 | if (get_counters(cnt_odd)) { | 979 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
980 | if (retval) { | ||
862 | re_initialize(); | 981 | re_initialize(); |
863 | goto restart; | 982 | goto restart; |
864 | } | 983 | } |
865 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 984 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
866 | compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
867 | timersub(&tv_odd, &tv_even, &tv_delta); | 985 | timersub(&tv_odd, &tv_even, &tv_delta); |
868 | compute_average(cnt_delta, cnt_average); | 986 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
869 | print_counters(cnt_delta); | 987 | compute_average(EVEN_COUNTERS); |
988 | format_all_counters(EVEN_COUNTERS); | ||
989 | flush_stdout(); | ||
870 | sleep(interval_sec); | 990 | sleep(interval_sec); |
871 | if (get_counters(cnt_even)) { | 991 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
992 | if (retval) { | ||
872 | re_initialize(); | 993 | re_initialize(); |
873 | goto restart; | 994 | goto restart; |
874 | } | 995 | } |
875 | gettimeofday(&tv_even, (struct timezone *)NULL); | 996 | gettimeofday(&tv_even, (struct timezone *)NULL); |
876 | compute_delta(cnt_even, cnt_odd, cnt_delta); | ||
877 | timersub(&tv_even, &tv_odd, &tv_delta); | 997 | timersub(&tv_even, &tv_odd, &tv_delta); |
878 | compute_average(cnt_delta, cnt_average); | 998 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); |
879 | print_counters(cnt_delta); | 999 | compute_average(ODD_COUNTERS); |
1000 | format_all_counters(ODD_COUNTERS); | ||
1001 | flush_stdout(); | ||
880 | } | 1002 | } |
881 | } | 1003 | } |
882 | 1004 | ||
@@ -1051,6 +1173,208 @@ int open_dev_cpu_msr(int dummy1) | |||
1051 | return 0; | 1173 | return 0; |
1052 | } | 1174 | } |
1053 | 1175 | ||
1176 | void topology_probe() | ||
1177 | { | ||
1178 | int i; | ||
1179 | int max_core_id = 0; | ||
1180 | int max_package_id = 0; | ||
1181 | int max_siblings = 0; | ||
1182 | struct cpu_topology { | ||
1183 | int core_id; | ||
1184 | int physical_package_id; | ||
1185 | } *cpus; | ||
1186 | |||
1187 | /* Initialize num_cpus, max_cpu_num */ | ||
1188 | topo.num_cpus = 0; | ||
1189 | topo.max_cpu_num = 0; | ||
1190 | for_all_proc_cpus(count_cpus); | ||
1191 | if (!summary_only && topo.num_cpus > 1) | ||
1192 | show_cpu = 1; | ||
1193 | |||
1194 | if (verbose > 1) | ||
1195 | fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); | ||
1196 | |||
1197 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); | ||
1198 | if (cpus == NULL) { | ||
1199 | perror("calloc cpus"); | ||
1200 | exit(1); | ||
1201 | } | ||
1202 | |||
1203 | /* | ||
1204 | * Allocate and initialize cpu_present_set | ||
1205 | */ | ||
1206 | cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
1207 | if (cpu_present_set == NULL) { | ||
1208 | perror("CPU_ALLOC"); | ||
1209 | exit(3); | ||
1210 | } | ||
1211 | cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
1212 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
1213 | for_all_proc_cpus(mark_cpu_present); | ||
1214 | |||
1215 | /* | ||
1216 | * Allocate and initialize cpu_affinity_set | ||
1217 | */ | ||
1218 | cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
1219 | if (cpu_affinity_set == NULL) { | ||
1220 | perror("CPU_ALLOC"); | ||
1221 | exit(3); | ||
1222 | } | ||
1223 | cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
1224 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); | ||
1225 | |||
1226 | |||
1227 | /* | ||
1228 | * For online cpus | ||
1229 | * find max_core_id, max_package_id | ||
1230 | */ | ||
1231 | for (i = 0; i <= topo.max_cpu_num; ++i) { | ||
1232 | int siblings; | ||
1233 | |||
1234 | if (cpu_is_not_present(i)) { | ||
1235 | if (verbose > 1) | ||
1236 | fprintf(stderr, "cpu%d NOT PRESENT\n", i); | ||
1237 | continue; | ||
1238 | } | ||
1239 | cpus[i].core_id = get_core_id(i); | ||
1240 | if (cpus[i].core_id > max_core_id) | ||
1241 | max_core_id = cpus[i].core_id; | ||
1242 | |||
1243 | cpus[i].physical_package_id = get_physical_package_id(i); | ||
1244 | if (cpus[i].physical_package_id > max_package_id) | ||
1245 | max_package_id = cpus[i].physical_package_id; | ||
1246 | |||
1247 | siblings = get_num_ht_siblings(i); | ||
1248 | if (siblings > max_siblings) | ||
1249 | max_siblings = siblings; | ||
1250 | if (verbose > 1) | ||
1251 | fprintf(stderr, "cpu %d pkg %d core %d\n", | ||
1252 | i, cpus[i].physical_package_id, cpus[i].core_id); | ||
1253 | } | ||
1254 | topo.num_cores_per_pkg = max_core_id + 1; | ||
1255 | if (verbose > 1) | ||
1256 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | ||
1257 | max_core_id, topo.num_cores_per_pkg); | ||
1258 | if (!summary_only && topo.num_cores_per_pkg > 1) | ||
1259 | show_core = 1; | ||
1260 | |||
1261 | topo.num_packages = max_package_id + 1; | ||
1262 | if (verbose > 1) | ||
1263 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | ||
1264 | max_package_id, topo.num_packages); | ||
1265 | if (!summary_only && topo.num_packages > 1) | ||
1266 | show_pkg = 1; | ||
1267 | |||
1268 | topo.num_threads_per_core = max_siblings; | ||
1269 | if (verbose > 1) | ||
1270 | fprintf(stderr, "max_siblings %d\n", max_siblings); | ||
1271 | |||
1272 | free(cpus); | ||
1273 | } | ||
1274 | |||
1275 | void | ||
1276 | allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) | ||
1277 | { | ||
1278 | int i; | ||
1279 | |||
1280 | *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * | ||
1281 | topo.num_packages, sizeof(struct thread_data)); | ||
1282 | if (*t == NULL) | ||
1283 | goto error; | ||
1284 | |||
1285 | for (i = 0; i < topo.num_threads_per_core * | ||
1286 | topo.num_cores_per_pkg * topo.num_packages; i++) | ||
1287 | (*t)[i].cpu_id = -1; | ||
1288 | |||
1289 | *c = calloc(topo.num_cores_per_pkg * topo.num_packages, | ||
1290 | sizeof(struct core_data)); | ||
1291 | if (*c == NULL) | ||
1292 | goto error; | ||
1293 | |||
1294 | for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) | ||
1295 | (*c)[i].core_id = -1; | ||
1296 | |||
1297 | *p = calloc(topo.num_packages, sizeof(struct pkg_data)); | ||
1298 | if (*p == NULL) | ||
1299 | goto error; | ||
1300 | |||
1301 | for (i = 0; i < topo.num_packages; i++) | ||
1302 | (*p)[i].package_id = i; | ||
1303 | |||
1304 | return; | ||
1305 | error: | ||
1306 | perror("calloc counters"); | ||
1307 | exit(1); | ||
1308 | } | ||
1309 | /* | ||
1310 | * init_counter() | ||
1311 | * | ||
1312 | * set cpu_id, core_num, pkg_num | ||
1313 | * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE | ||
1314 | * | ||
1315 | * increment topo.num_cores when 1st core in pkg seen | ||
1316 | */ | ||
1317 | void init_counter(struct thread_data *thread_base, struct core_data *core_base, | ||
1318 | struct pkg_data *pkg_base, int thread_num, int core_num, | ||
1319 | int pkg_num, int cpu_id) | ||
1320 | { | ||
1321 | struct thread_data *t; | ||
1322 | struct core_data *c; | ||
1323 | struct pkg_data *p; | ||
1324 | |||
1325 | t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); | ||
1326 | c = GET_CORE(core_base, core_num, pkg_num); | ||
1327 | p = GET_PKG(pkg_base, pkg_num); | ||
1328 | |||
1329 | t->cpu_id = cpu_id; | ||
1330 | if (thread_num == 0) { | ||
1331 | t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; | ||
1332 | if (cpu_is_first_core_in_package(cpu_id)) | ||
1333 | t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
1334 | } | ||
1335 | |||
1336 | c->core_id = core_num; | ||
1337 | p->package_id = pkg_num; | ||
1338 | } | ||
1339 | |||
1340 | |||
1341 | int initialize_counters(int cpu_id) | ||
1342 | { | ||
1343 | int my_thread_id, my_core_id, my_package_id; | ||
1344 | |||
1345 | my_package_id = get_physical_package_id(cpu_id); | ||
1346 | my_core_id = get_core_id(cpu_id); | ||
1347 | |||
1348 | if (cpu_is_first_sibling_in_core(cpu_id)) { | ||
1349 | my_thread_id = 0; | ||
1350 | topo.num_cores++; | ||
1351 | } else { | ||
1352 | my_thread_id = 1; | ||
1353 | } | ||
1354 | |||
1355 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
1356 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
1357 | return 0; | ||
1358 | } | ||
1359 | |||
1360 | void allocate_output_buffer() | ||
1361 | { | ||
1362 | output_buffer = calloc(1, (1 + topo.num_cpus) * 128); | ||
1363 | outp = output_buffer; | ||
1364 | if (outp == NULL) { | ||
1365 | perror("calloc"); | ||
1366 | exit(-1); | ||
1367 | } | ||
1368 | } | ||
1369 | |||
1370 | void setup_all_buffers(void) | ||
1371 | { | ||
1372 | topology_probe(); | ||
1373 | allocate_counters(&thread_even, &core_even, &package_even); | ||
1374 | allocate_counters(&thread_odd, &core_odd, &package_odd); | ||
1375 | allocate_output_buffer(); | ||
1376 | for_all_proc_cpus(initialize_counters); | ||
1377 | } | ||
1054 | void turbostat_init() | 1378 | void turbostat_init() |
1055 | { | 1379 | { |
1056 | check_cpuid(); | 1380 | check_cpuid(); |
@@ -1058,21 +1382,19 @@ void turbostat_init() | |||
1058 | check_dev_msr(); | 1382 | check_dev_msr(); |
1059 | check_super_user(); | 1383 | check_super_user(); |
1060 | 1384 | ||
1061 | num_cpus = for_all_cpus(alloc_new_counters); | 1385 | setup_all_buffers(); |
1062 | cpu_mask_init(num_cpus); | ||
1063 | 1386 | ||
1064 | if (verbose) | 1387 | if (verbose) |
1065 | print_nehalem_info(); | 1388 | print_verbose_header(); |
1066 | } | 1389 | } |
1067 | 1390 | ||
1068 | int fork_it(char **argv) | 1391 | int fork_it(char **argv) |
1069 | { | 1392 | { |
1070 | int retval; | ||
1071 | pid_t child_pid; | 1393 | pid_t child_pid; |
1072 | get_counters(cnt_even); | ||
1073 | 1394 | ||
1074 | /* clear affinity side-effect of get_counters() */ | 1395 | for_all_cpus(get_counters, EVEN_COUNTERS); |
1075 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | 1396 | /* clear affinity side-effect of get_counters() */ |
1397 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | ||
1076 | gettimeofday(&tv_even, (struct timezone *)NULL); | 1398 | gettimeofday(&tv_even, (struct timezone *)NULL); |
1077 | 1399 | ||
1078 | child_pid = fork(); | 1400 | child_pid = fork(); |
@@ -1095,14 +1417,17 @@ int fork_it(char **argv) | |||
1095 | exit(1); | 1417 | exit(1); |
1096 | } | 1418 | } |
1097 | } | 1419 | } |
1098 | get_counters(cnt_odd); | 1420 | /* |
1421 | * n.b. fork_it() does not check for errors from for_all_cpus() | ||
1422 | * because re-starting is problematic when forking | ||
1423 | */ | ||
1424 | for_all_cpus(get_counters, ODD_COUNTERS); | ||
1099 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 1425 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
1100 | retval = compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
1101 | |||
1102 | timersub(&tv_odd, &tv_even, &tv_delta); | 1426 | timersub(&tv_odd, &tv_even, &tv_delta); |
1103 | compute_average(cnt_delta, cnt_average); | 1427 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
1104 | if (!retval) | 1428 | compute_average(EVEN_COUNTERS); |
1105 | print_counters(cnt_delta); | 1429 | format_all_counters(EVEN_COUNTERS); |
1430 | flush_stderr(); | ||
1106 | 1431 | ||
1107 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); | 1432 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); |
1108 | 1433 | ||
@@ -1115,8 +1440,14 @@ void cmdline(int argc, char **argv) | |||
1115 | 1440 | ||
1116 | progname = argv[0]; | 1441 | progname = argv[0]; |
1117 | 1442 | ||
1118 | while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { | 1443 | while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { |
1119 | switch (opt) { | 1444 | switch (opt) { |
1445 | case 'c': | ||
1446 | show_core_only++; | ||
1447 | break; | ||
1448 | case 'p': | ||
1449 | show_pkg_only++; | ||
1450 | break; | ||
1120 | case 's': | 1451 | case 's': |
1121 | summary_only++; | 1452 | summary_only++; |
1122 | break; | 1453 | break; |
@@ -1142,10 +1473,8 @@ int main(int argc, char **argv) | |||
1142 | cmdline(argc, argv); | 1473 | cmdline(argc, argv); |
1143 | 1474 | ||
1144 | if (verbose > 1) | 1475 | if (verbose > 1) |
1145 | fprintf(stderr, "turbostat Dec 6, 2010" | 1476 | fprintf(stderr, "turbostat v2.0 May 16, 2012" |
1146 | " - Len Brown <lenb@kernel.org>\n"); | 1477 | " - Len Brown <lenb@kernel.org>\n"); |
1147 | if (verbose > 1) | ||
1148 | fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); | ||
1149 | 1478 | ||
1150 | turbostat_init(); | 1479 | turbostat_init(); |
1151 | 1480 | ||