diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2018-06-03 04:11:50 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2018-06-03 04:11:50 -0400 |
commit | ba8042a85e8aebc8043deee2d396459f2c39a66d (patch) | |
tree | d49d006d853267469de3fda52d4f031afb476061 /tools | |
parent | b04e217704b7f879c6b91222b066983a44a7a09f (diff) | |
parent | 201d4f50fef3c10856022b21cfd9fd81358a62ef (diff) |
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull turbostat utility updates for v4.18 from Len Brown.
* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: (65 commits)
tools/power turbostat: update version number
tools/power turbostat: Add Node in output
tools/power turbostat: add node information into turbostat calculations
tools/power turbostat: remove num_ from cpu_topology struct
tools/power turbostat: rename num_cores_per_pkg to num_cores_per_node
tools/power turbostat: track thread ID in cpu_topology
tools/power turbostat: Calculate additional node information for a package
tools/power turbostat: Fix node and siblings lookup data
tools/power turbostat: set max_num_cpus equal to the cpumask length
tools/power turbostat: if --num_iterations, print for specific number of iterations
tools/power turbostat: Add Cannon Lake support
tools/power turbostat: delete duplicate #defines
x86: msr-index.h: Correct SNB_C1/C3_AUTO_UNDEMOTE defines
tools/power turbostat: Correct SNB_C1/C3_AUTO_UNDEMOTE defines
tools/power turbostat: add POLL and POLL% column
tools/power turbostat: Fix --hide Pk%pc10
tools/power turbostat: Build-in "Low Power Idle" counters support
tools/power turbostat: Don't make man pages executable
tools/power turbostat: remove blank lines
tools/power turbostat: a small C-states dump readability immprovement
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 2 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 26 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 908 | ||||
-rw-r--r-- | tools/power/x86/x86_energy_perf_policy/Makefile | 2 |
4 files changed, 671 insertions, 267 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index a9bc914a8fe8..2ab25aa38263 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
@@ -25,4 +25,4 @@ install : turbostat | |||
25 | install -d $(DESTDIR)$(PREFIX)/bin | 25 | install -d $(DESTDIR)$(PREFIX)/bin |
26 | install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat | 26 | install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat |
27 | install -d $(DESTDIR)$(PREFIX)/share/man/man8 | 27 | install -d $(DESTDIR)$(PREFIX)/share/man/man8 |
28 | install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 | 28 | install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index ccf2a69365cc..ca9ef7017624 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -54,9 +54,12 @@ name as necessary to disambiguate it from others is necessary. Note that option | |||
54 | .PP | 54 | .PP |
55 | \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 | 55 | \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 |
56 | .PP | 56 | .PP |
57 | \fB--hide column\fP do not show the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. | 57 | \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. |
58 | .PP | 58 | .PP |
59 | \fB--show column\fP show only the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. | 59 | \fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds". |
60 | The column name "all" can be used to enable all disabled-by-default built-in counters. | ||
61 | .PP | ||
62 | \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. | ||
60 | .PP | 63 | .PP |
61 | \fB--Dump\fP displays the raw counter values. | 64 | \fB--Dump\fP displays the raw counter values. |
62 | .PP | 65 | .PP |
@@ -64,6 +67,8 @@ name as necessary to disambiguate it from others is necessary. Note that option | |||
64 | .PP | 67 | .PP |
65 | \fB--interval seconds\fP overrides the default 5.0 second measurement interval. | 68 | \fB--interval seconds\fP overrides the default 5.0 second measurement interval. |
66 | .PP | 69 | .PP |
70 | \fB--num_iterations num\fP number of the measurement iterations. | ||
71 | .PP | ||
67 | \fB--out output_file\fP turbostat output is written to the specified output_file. | 72 | \fB--out output_file\fP turbostat output is written to the specified output_file. |
68 | The file is truncated if it already exists, and it is created if it does not exist. | 73 | The file is truncated if it already exists, and it is created if it does not exist. |
69 | .PP | 74 | .PP |
@@ -86,6 +91,8 @@ displays the statistics gathered since it was forked. | |||
86 | The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. | 91 | The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. |
87 | .SH COLUMN DESCRIPTIONS | 92 | .SH COLUMN DESCRIPTIONS |
88 | .nf | 93 | .nf |
94 | \fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus. | ||
95 | \fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU. | ||
89 | \fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). | 96 | \fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). |
90 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. | 97 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. |
91 | \fBPackage\fP processor package number -- not present on systems with a single processor package. | 98 | \fBPackage\fP processor package number -- not present on systems with a single processor package. |
@@ -262,6 +269,21 @@ CPU PRF_CTRL | |||
262 | 269 | ||
263 | .fi | 270 | .fi |
264 | 271 | ||
272 | .SH INPUT | ||
273 | |||
274 | For interval-mode, turbostat will immediately end the current interval | ||
275 | when it sees a newline on standard input. | ||
276 | turbostat will then start the next interval. | ||
277 | Control-C will be send a SIGINT to turbostat, | ||
278 | which will immediately abort the program with no further processing. | ||
279 | .SH SIGNALS | ||
280 | |||
281 | SIGINT will interrupt interval-mode. | ||
282 | The end-of-interval data will be collected and displayed before turbostat exits. | ||
283 | |||
284 | SIGUSR1 will end current interval, | ||
285 | end-of-interval data will be collected and displayed before turbostat | ||
286 | starts a new interval. | ||
265 | .SH NOTES | 287 | .SH NOTES |
266 | 288 | ||
267 | .B "turbostat " | 289 | .B "turbostat " |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bd9c6b31a504..d6cff3070ebd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <sys/types.h> | 29 | #include <sys/types.h> |
30 | #include <sys/wait.h> | 30 | #include <sys/wait.h> |
31 | #include <sys/stat.h> | 31 | #include <sys/stat.h> |
32 | #include <sys/select.h> | ||
32 | #include <sys/resource.h> | 33 | #include <sys/resource.h> |
33 | #include <fcntl.h> | 34 | #include <fcntl.h> |
34 | #include <signal.h> | 35 | #include <signal.h> |
@@ -47,9 +48,13 @@ | |||
47 | char *proc_stat = "/proc/stat"; | 48 | char *proc_stat = "/proc/stat"; |
48 | FILE *outf; | 49 | FILE *outf; |
49 | int *fd_percpu; | 50 | int *fd_percpu; |
51 | struct timeval interval_tv = {5, 0}; | ||
50 | struct timespec interval_ts = {5, 0}; | 52 | struct timespec interval_ts = {5, 0}; |
53 | struct timespec one_msec = {0, 1000000}; | ||
54 | unsigned int num_iterations; | ||
51 | unsigned int debug; | 55 | unsigned int debug; |
52 | unsigned int quiet; | 56 | unsigned int quiet; |
57 | unsigned int shown; | ||
53 | unsigned int sums_need_wide_columns; | 58 | unsigned int sums_need_wide_columns; |
54 | unsigned int rapl_joules; | 59 | unsigned int rapl_joules; |
55 | unsigned int summary_only; | 60 | unsigned int summary_only; |
@@ -58,6 +63,7 @@ unsigned int dump_only; | |||
58 | unsigned int do_snb_cstates; | 63 | unsigned int do_snb_cstates; |
59 | unsigned int do_knl_cstates; | 64 | unsigned int do_knl_cstates; |
60 | unsigned int do_slm_cstates; | 65 | unsigned int do_slm_cstates; |
66 | unsigned int do_cnl_cstates; | ||
61 | unsigned int use_c1_residency_msr; | 67 | unsigned int use_c1_residency_msr; |
62 | unsigned int has_aperf; | 68 | unsigned int has_aperf; |
63 | unsigned int has_epb; | 69 | unsigned int has_epb; |
@@ -80,6 +86,8 @@ unsigned int do_rapl; | |||
80 | unsigned int do_dts; | 86 | unsigned int do_dts; |
81 | unsigned int do_ptm; | 87 | unsigned int do_ptm; |
82 | unsigned long long gfx_cur_rc6_ms; | 88 | unsigned long long gfx_cur_rc6_ms; |
89 | unsigned long long cpuidle_cur_cpu_lpi_us; | ||
90 | unsigned long long cpuidle_cur_sys_lpi_us; | ||
83 | unsigned int gfx_cur_mhz; | 91 | unsigned int gfx_cur_mhz; |
84 | unsigned int tcc_activation_temp; | 92 | unsigned int tcc_activation_temp; |
85 | unsigned int tcc_activation_temp_override; | 93 | unsigned int tcc_activation_temp_override; |
@@ -87,6 +95,7 @@ double rapl_power_units, rapl_time_units; | |||
87 | double rapl_dram_energy_units, rapl_energy_units; | 95 | double rapl_dram_energy_units, rapl_energy_units; |
88 | double rapl_joule_counter_range; | 96 | double rapl_joule_counter_range; |
89 | unsigned int do_core_perf_limit_reasons; | 97 | unsigned int do_core_perf_limit_reasons; |
98 | unsigned int has_automatic_cstate_conversion; | ||
90 | unsigned int do_gfx_perf_limit_reasons; | 99 | unsigned int do_gfx_perf_limit_reasons; |
91 | unsigned int do_ring_perf_limit_reasons; | 100 | unsigned int do_ring_perf_limit_reasons; |
92 | unsigned int crystal_hz; | 101 | unsigned int crystal_hz; |
@@ -147,7 +156,9 @@ char *progname; | |||
147 | #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ | 156 | #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ |
148 | cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; | 157 | cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; |
149 | size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; | 158 | size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; |
150 | #define MAX_ADDED_COUNTERS 16 | 159 | #define MAX_ADDED_COUNTERS 8 |
160 | #define MAX_ADDED_THREAD_COUNTERS 24 | ||
161 | #define BITMASK_SIZE 32 | ||
151 | 162 | ||
152 | struct thread_data { | 163 | struct thread_data { |
153 | struct timeval tv_begin; | 164 | struct timeval tv_begin; |
@@ -162,7 +173,7 @@ struct thread_data { | |||
162 | unsigned int flags; | 173 | unsigned int flags; |
163 | #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 | 174 | #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 |
164 | #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 | 175 | #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 |
165 | unsigned long long counter[MAX_ADDED_COUNTERS]; | 176 | unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; |
166 | } *thread_even, *thread_odd; | 177 | } *thread_even, *thread_odd; |
167 | 178 | ||
168 | struct core_data { | 179 | struct core_data { |
@@ -183,6 +194,8 @@ struct pkg_data { | |||
183 | unsigned long long pc8; | 194 | unsigned long long pc8; |
184 | unsigned long long pc9; | 195 | unsigned long long pc9; |
185 | unsigned long long pc10; | 196 | unsigned long long pc10; |
197 | unsigned long long cpu_lpi; | ||
198 | unsigned long long sys_lpi; | ||
186 | unsigned long long pkg_wtd_core_c0; | 199 | unsigned long long pkg_wtd_core_c0; |
187 | unsigned long long pkg_any_core_c0; | 200 | unsigned long long pkg_any_core_c0; |
188 | unsigned long long pkg_any_gfxe_c0; | 201 | unsigned long long pkg_any_gfxe_c0; |
@@ -203,12 +216,21 @@ struct pkg_data { | |||
203 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | 216 | #define ODD_COUNTERS thread_odd, core_odd, package_odd |
204 | #define EVEN_COUNTERS thread_even, core_even, package_even | 217 | #define EVEN_COUNTERS thread_even, core_even, package_even |
205 | 218 | ||
206 | #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ | 219 | #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ |
207 | (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ | 220 | ((thread_base) + \ |
208 | topo.num_threads_per_core + \ | 221 | ((pkg_no) * \ |
209 | (core_no) * topo.num_threads_per_core + (thread_no)) | 222 | topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ |
210 | #define GET_CORE(core_base, core_no, pkg_no) \ | 223 | ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ |
211 | (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) | 224 | ((core_no) * topo.threads_per_core) + \ |
225 | (thread_no)) | ||
226 | |||
227 | #define GET_CORE(core_base, core_no, node_no, pkg_no) \ | ||
228 | ((core_base) + \ | ||
229 | ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ | ||
230 | ((node_no) * topo.cores_per_node) + \ | ||
231 | (core_no)) | ||
232 | |||
233 | |||
212 | #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) | 234 | #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) |
213 | 235 | ||
214 | enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; | 236 | enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; |
@@ -244,14 +266,25 @@ struct system_summary { | |||
244 | struct pkg_data packages; | 266 | struct pkg_data packages; |
245 | } average; | 267 | } average; |
246 | 268 | ||
269 | struct cpu_topology { | ||
270 | int physical_package_id; | ||
271 | int logical_cpu_id; | ||
272 | int physical_node_id; | ||
273 | int logical_node_id; /* 0-based count within the package */ | ||
274 | int physical_core_id; | ||
275 | int thread_id; | ||
276 | cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ | ||
277 | } *cpus; | ||
247 | 278 | ||
248 | struct topo_params { | 279 | struct topo_params { |
249 | int num_packages; | 280 | int num_packages; |
250 | int num_cpus; | 281 | int num_cpus; |
251 | int num_cores; | 282 | int num_cores; |
252 | int max_cpu_num; | 283 | int max_cpu_num; |
253 | int num_cores_per_pkg; | 284 | int max_node_num; |
254 | int num_threads_per_core; | 285 | int nodes_per_pkg; |
286 | int cores_per_node; | ||
287 | int threads_per_core; | ||
255 | } topo; | 288 | } topo; |
256 | 289 | ||
257 | struct timeval tv_even, tv_odd, tv_delta; | 290 | struct timeval tv_even, tv_odd, tv_delta; |
@@ -273,27 +306,33 @@ int cpu_is_not_present(int cpu) | |||
273 | int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), | 306 | int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), |
274 | struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) | 307 | struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) |
275 | { | 308 | { |
276 | int retval, pkg_no, core_no, thread_no; | 309 | int retval, pkg_no, core_no, thread_no, node_no; |
277 | 310 | ||
278 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { | 311 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { |
279 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { | 312 | for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { |
280 | for (thread_no = 0; thread_no < | 313 | for (node_no = 0; node_no < topo.nodes_per_pkg; |
281 | topo.num_threads_per_core; ++thread_no) { | 314 | node_no++) { |
282 | struct thread_data *t; | 315 | for (thread_no = 0; thread_no < |
283 | struct core_data *c; | 316 | topo.threads_per_core; ++thread_no) { |
284 | struct pkg_data *p; | 317 | struct thread_data *t; |
285 | 318 | struct core_data *c; | |
286 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); | 319 | struct pkg_data *p; |
287 | 320 | ||
288 | if (cpu_is_not_present(t->cpu_id)) | 321 | t = GET_THREAD(thread_base, thread_no, |
289 | continue; | 322 | core_no, node_no, |
290 | 323 | pkg_no); | |
291 | c = GET_CORE(core_base, core_no, pkg_no); | 324 | |
292 | p = GET_PKG(pkg_base, pkg_no); | 325 | if (cpu_is_not_present(t->cpu_id)) |
293 | 326 | continue; | |
294 | retval = func(t, c, p); | 327 | |
295 | if (retval) | 328 | c = GET_CORE(core_base, core_no, |
296 | return retval; | 329 | node_no, pkg_no); |
330 | p = GET_PKG(pkg_base, pkg_no); | ||
331 | |||
332 | retval = func(t, c, p); | ||
333 | if (retval) | ||
334 | return retval; | ||
335 | } | ||
297 | } | 336 | } |
298 | } | 337 | } |
299 | } | 338 | } |
@@ -346,6 +385,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
346 | * Thus, strings that are proper sub-sets must follow their more specific peers. | 385 | * Thus, strings that are proper sub-sets must follow their more specific peers. |
347 | */ | 386 | */ |
348 | struct msr_counter bic[] = { | 387 | struct msr_counter bic[] = { |
388 | { 0x0, "usec" }, | ||
389 | { 0x0, "Time_Of_Day_Seconds" }, | ||
349 | { 0x0, "Package" }, | 390 | { 0x0, "Package" }, |
350 | { 0x0, "Avg_MHz" }, | 391 | { 0x0, "Avg_MHz" }, |
351 | { 0x0, "Bzy_MHz" }, | 392 | { 0x0, "Bzy_MHz" }, |
@@ -369,7 +410,9 @@ struct msr_counter bic[] = { | |||
369 | { 0x0, "Pkg%pc7" }, | 410 | { 0x0, "Pkg%pc7" }, |
370 | { 0x0, "Pkg%pc8" }, | 411 | { 0x0, "Pkg%pc8" }, |
371 | { 0x0, "Pkg%pc9" }, | 412 | { 0x0, "Pkg%pc9" }, |
372 | { 0x0, "Pkg%pc10" }, | 413 | { 0x0, "Pk%pc10" }, |
414 | { 0x0, "CPU%LPI" }, | ||
415 | { 0x0, "SYS%LPI" }, | ||
373 | { 0x0, "PkgWatt" }, | 416 | { 0x0, "PkgWatt" }, |
374 | { 0x0, "CorWatt" }, | 417 | { 0x0, "CorWatt" }, |
375 | { 0x0, "GFXWatt" }, | 418 | { 0x0, "GFXWatt" }, |
@@ -389,62 +432,72 @@ struct msr_counter bic[] = { | |||
389 | { 0x0, "Any%C0" }, | 432 | { 0x0, "Any%C0" }, |
390 | { 0x0, "GFX%C0" }, | 433 | { 0x0, "GFX%C0" }, |
391 | { 0x0, "CPUGFX%" }, | 434 | { 0x0, "CPUGFX%" }, |
435 | { 0x0, "Node%" }, | ||
392 | }; | 436 | }; |
393 | 437 | ||
394 | 438 | ||
395 | 439 | ||
396 | #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) | 440 | #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) |
397 | #define BIC_Package (1ULL << 0) | 441 | #define BIC_USEC (1ULL << 0) |
398 | #define BIC_Avg_MHz (1ULL << 1) | 442 | #define BIC_TOD (1ULL << 1) |
399 | #define BIC_Bzy_MHz (1ULL << 2) | 443 | #define BIC_Package (1ULL << 2) |
400 | #define BIC_TSC_MHz (1ULL << 3) | 444 | #define BIC_Avg_MHz (1ULL << 3) |
401 | #define BIC_IRQ (1ULL << 4) | 445 | #define BIC_Bzy_MHz (1ULL << 4) |
402 | #define BIC_SMI (1ULL << 5) | 446 | #define BIC_TSC_MHz (1ULL << 5) |
403 | #define BIC_Busy (1ULL << 6) | 447 | #define BIC_IRQ (1ULL << 6) |
404 | #define BIC_CPU_c1 (1ULL << 7) | 448 | #define BIC_SMI (1ULL << 7) |
405 | #define BIC_CPU_c3 (1ULL << 8) | 449 | #define BIC_Busy (1ULL << 8) |
406 | #define BIC_CPU_c6 (1ULL << 9) | 450 | #define BIC_CPU_c1 (1ULL << 9) |
407 | #define BIC_CPU_c7 (1ULL << 10) | 451 | #define BIC_CPU_c3 (1ULL << 10) |
408 | #define BIC_ThreadC (1ULL << 11) | 452 | #define BIC_CPU_c6 (1ULL << 11) |
409 | #define BIC_CoreTmp (1ULL << 12) | 453 | #define BIC_CPU_c7 (1ULL << 12) |
410 | #define BIC_CoreCnt (1ULL << 13) | 454 | #define BIC_ThreadC (1ULL << 13) |
411 | #define BIC_PkgTmp (1ULL << 14) | 455 | #define BIC_CoreTmp (1ULL << 14) |
412 | #define BIC_GFX_rc6 (1ULL << 15) | 456 | #define BIC_CoreCnt (1ULL << 15) |
413 | #define BIC_GFXMHz (1ULL << 16) | 457 | #define BIC_PkgTmp (1ULL << 16) |
414 | #define BIC_Pkgpc2 (1ULL << 17) | 458 | #define BIC_GFX_rc6 (1ULL << 17) |
415 | #define BIC_Pkgpc3 (1ULL << 18) | 459 | #define BIC_GFXMHz (1ULL << 18) |
416 | #define BIC_Pkgpc6 (1ULL << 19) | 460 | #define BIC_Pkgpc2 (1ULL << 19) |
417 | #define BIC_Pkgpc7 (1ULL << 20) | 461 | #define BIC_Pkgpc3 (1ULL << 20) |
418 | #define BIC_Pkgpc8 (1ULL << 21) | 462 | #define BIC_Pkgpc6 (1ULL << 21) |
419 | #define BIC_Pkgpc9 (1ULL << 22) | 463 | #define BIC_Pkgpc7 (1ULL << 22) |
420 | #define BIC_Pkgpc10 (1ULL << 23) | 464 | #define BIC_Pkgpc8 (1ULL << 23) |
421 | #define BIC_PkgWatt (1ULL << 24) | 465 | #define BIC_Pkgpc9 (1ULL << 24) |
422 | #define BIC_CorWatt (1ULL << 25) | 466 | #define BIC_Pkgpc10 (1ULL << 25) |
423 | #define BIC_GFXWatt (1ULL << 26) | 467 | #define BIC_CPU_LPI (1ULL << 26) |
424 | #define BIC_PkgCnt (1ULL << 27) | 468 | #define BIC_SYS_LPI (1ULL << 27) |
425 | #define BIC_RAMWatt (1ULL << 28) | 469 | #define BIC_PkgWatt (1ULL << 26) |
426 | #define BIC_PKG__ (1ULL << 29) | 470 | #define BIC_CorWatt (1ULL << 27) |
427 | #define BIC_RAM__ (1ULL << 30) | 471 | #define BIC_GFXWatt (1ULL << 28) |
428 | #define BIC_Pkg_J (1ULL << 31) | 472 | #define BIC_PkgCnt (1ULL << 29) |
429 | #define BIC_Cor_J (1ULL << 32) | 473 | #define BIC_RAMWatt (1ULL << 30) |
430 | #define BIC_GFX_J (1ULL << 33) | 474 | #define BIC_PKG__ (1ULL << 31) |
431 | #define BIC_RAM_J (1ULL << 34) | 475 | #define BIC_RAM__ (1ULL << 32) |
432 | #define BIC_Core (1ULL << 35) | 476 | #define BIC_Pkg_J (1ULL << 33) |
433 | #define BIC_CPU (1ULL << 36) | 477 | #define BIC_Cor_J (1ULL << 34) |
434 | #define BIC_Mod_c6 (1ULL << 37) | 478 | #define BIC_GFX_J (1ULL << 35) |
435 | #define BIC_sysfs (1ULL << 38) | 479 | #define BIC_RAM_J (1ULL << 36) |
436 | #define BIC_Totl_c0 (1ULL << 39) | 480 | #define BIC_Core (1ULL << 37) |
437 | #define BIC_Any_c0 (1ULL << 40) | 481 | #define BIC_CPU (1ULL << 38) |
438 | #define BIC_GFX_c0 (1ULL << 41) | 482 | #define BIC_Mod_c6 (1ULL << 39) |
439 | #define BIC_CPUGFX (1ULL << 42) | 483 | #define BIC_sysfs (1ULL << 40) |
440 | 484 | #define BIC_Totl_c0 (1ULL << 41) | |
441 | unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; | 485 | #define BIC_Any_c0 (1ULL << 42) |
442 | unsigned long long bic_present = BIC_sysfs; | 486 | #define BIC_GFX_c0 (1ULL << 43) |
487 | #define BIC_CPUGFX (1ULL << 44) | ||
488 | #define BIC_Node (1ULL << 45) | ||
489 | |||
490 | #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) | ||
491 | |||
492 | unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); | ||
493 | unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs; | ||
443 | 494 | ||
444 | #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) | 495 | #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) |
496 | #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) | ||
445 | #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) | 497 | #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) |
446 | #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) | 498 | #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) |
447 | 499 | ||
500 | |||
448 | #define MAX_DEFERRED 16 | 501 | #define MAX_DEFERRED 16 |
449 | char *deferred_skip_names[MAX_DEFERRED]; | 502 | char *deferred_skip_names[MAX_DEFERRED]; |
450 | int deferred_skip_index; | 503 | int deferred_skip_index; |
@@ -469,9 +522,10 @@ void help(void) | |||
469 | "--cpu cpu-set limit output to summary plus cpu-set:\n" | 522 | "--cpu cpu-set limit output to summary plus cpu-set:\n" |
470 | " {core | package | j,k,l..m,n-p }\n" | 523 | " {core | package | j,k,l..m,n-p }\n" |
471 | "--quiet skip decoding system configuration header\n" | 524 | "--quiet skip decoding system configuration header\n" |
472 | "--interval sec Override default 5-second measurement interval\n" | 525 | "--interval sec.subsec Override default 5-second measurement interval\n" |
473 | "--help print this help message\n" | 526 | "--help print this help message\n" |
474 | "--list list column headers only\n" | 527 | "--list list column headers only\n" |
528 | "--num_iterations num number of the measurement iterations\n" | ||
475 | "--out file create or truncate \"file\" for all output\n" | 529 | "--out file create or truncate \"file\" for all output\n" |
476 | "--version print version information\n" | 530 | "--version print version information\n" |
477 | "\n" | 531 | "\n" |
@@ -496,6 +550,9 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) | |||
496 | if (comma) | 550 | if (comma) |
497 | *comma = '\0'; | 551 | *comma = '\0'; |
498 | 552 | ||
553 | if (!strcmp(name_list, "all")) | ||
554 | return ~0; | ||
555 | |||
499 | for (i = 0; i < MAX_BIC; ++i) { | 556 | for (i = 0; i < MAX_BIC; ++i) { |
500 | if (!strcmp(name_list, bic[i].name)) { | 557 | if (!strcmp(name_list, bic[i].name)) { |
501 | retval |= (1ULL << i); | 558 | retval |= (1ULL << i); |
@@ -532,10 +589,14 @@ void print_header(char *delim) | |||
532 | struct msr_counter *mp; | 589 | struct msr_counter *mp; |
533 | int printed = 0; | 590 | int printed = 0; |
534 | 591 | ||
535 | if (debug) | 592 | if (DO_BIC(BIC_USEC)) |
536 | outp += sprintf(outp, "usec %s", delim); | 593 | outp += sprintf(outp, "%susec", (printed++ ? delim : "")); |
594 | if (DO_BIC(BIC_TOD)) | ||
595 | outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); | ||
537 | if (DO_BIC(BIC_Package)) | 596 | if (DO_BIC(BIC_Package)) |
538 | outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); | 597 | outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); |
598 | if (DO_BIC(BIC_Node)) | ||
599 | outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); | ||
539 | if (DO_BIC(BIC_Core)) | 600 | if (DO_BIC(BIC_Core)) |
540 | outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); | 601 | outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); |
541 | if (DO_BIC(BIC_CPU)) | 602 | if (DO_BIC(BIC_CPU)) |
@@ -576,7 +637,7 @@ void print_header(char *delim) | |||
576 | 637 | ||
577 | if (DO_BIC(BIC_CPU_c1)) | 638 | if (DO_BIC(BIC_CPU_c1)) |
578 | outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); | 639 | outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); |
579 | if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) | 640 | if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) |
580 | outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); | 641 | outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); |
581 | if (DO_BIC(BIC_CPU_c6)) | 642 | if (DO_BIC(BIC_CPU_c6)) |
582 | outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); | 643 | outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); |
@@ -635,6 +696,10 @@ void print_header(char *delim) | |||
635 | outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); | 696 | outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); |
636 | if (DO_BIC(BIC_Pkgpc10)) | 697 | if (DO_BIC(BIC_Pkgpc10)) |
637 | outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); | 698 | outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); |
699 | if (DO_BIC(BIC_CPU_LPI)) | ||
700 | outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); | ||
701 | if (DO_BIC(BIC_SYS_LPI)) | ||
702 | outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); | ||
638 | 703 | ||
639 | if (do_rapl && !rapl_joules) { | 704 | if (do_rapl && !rapl_joules) { |
640 | if (DO_BIC(BIC_PkgWatt)) | 705 | if (DO_BIC(BIC_PkgWatt)) |
@@ -739,6 +804,9 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
739 | outp += sprintf(outp, "pc8: %016llX\n", p->pc8); | 804 | outp += sprintf(outp, "pc8: %016llX\n", p->pc8); |
740 | outp += sprintf(outp, "pc9: %016llX\n", p->pc9); | 805 | outp += sprintf(outp, "pc9: %016llX\n", p->pc9); |
741 | outp += sprintf(outp, "pc10: %016llX\n", p->pc10); | 806 | outp += sprintf(outp, "pc10: %016llX\n", p->pc10); |
807 | outp += sprintf(outp, "pc10: %016llX\n", p->pc10); | ||
808 | outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); | ||
809 | outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); | ||
742 | outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); | 810 | outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); |
743 | outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); | 811 | outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); |
744 | outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); | 812 | outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); |
@@ -786,7 +854,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
786 | (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) | 854 | (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) |
787 | return 0; | 855 | return 0; |
788 | 856 | ||
789 | if (debug) { | 857 | if (DO_BIC(BIC_USEC)) { |
790 | /* on each row, print how many usec each timestamp took to gather */ | 858 | /* on each row, print how many usec each timestamp took to gather */ |
791 | struct timeval tv; | 859 | struct timeval tv; |
792 | 860 | ||
@@ -794,6 +862,10 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
794 | outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); | 862 | outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); |
795 | } | 863 | } |
796 | 864 | ||
865 | /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ | ||
866 | if (DO_BIC(BIC_TOD)) | ||
867 | outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); | ||
868 | |||
797 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; | 869 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; |
798 | 870 | ||
799 | tsc = t->tsc * tsc_tweak; | 871 | tsc = t->tsc * tsc_tweak; |
@@ -802,6 +874,8 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
802 | if (t == &average.threads) { | 874 | if (t == &average.threads) { |
803 | if (DO_BIC(BIC_Package)) | 875 | if (DO_BIC(BIC_Package)) |
804 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); | 876 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); |
877 | if (DO_BIC(BIC_Node)) | ||
878 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); | ||
805 | if (DO_BIC(BIC_Core)) | 879 | if (DO_BIC(BIC_Core)) |
806 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); | 880 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); |
807 | if (DO_BIC(BIC_CPU)) | 881 | if (DO_BIC(BIC_CPU)) |
@@ -813,6 +887,15 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
813 | else | 887 | else |
814 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); | 888 | outp += sprintf(outp, "%s-", (printed++ ? delim : "")); |
815 | } | 889 | } |
890 | if (DO_BIC(BIC_Node)) { | ||
891 | if (t) | ||
892 | outp += sprintf(outp, "%s%d", | ||
893 | (printed++ ? delim : ""), | ||
894 | cpus[t->cpu_id].physical_node_id); | ||
895 | else | ||
896 | outp += sprintf(outp, "%s-", | ||
897 | (printed++ ? delim : "")); | ||
898 | } | ||
816 | if (DO_BIC(BIC_Core)) { | 899 | if (DO_BIC(BIC_Core)) { |
817 | if (c) | 900 | if (c) |
818 | outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); | 901 | outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); |
@@ -882,7 +965,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
882 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 965 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
883 | goto done; | 966 | goto done; |
884 | 967 | ||
885 | if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) | 968 | if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) |
886 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); | 969 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); |
887 | if (DO_BIC(BIC_CPU_c6)) | 970 | if (DO_BIC(BIC_CPU_c6)) |
888 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); | 971 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); |
@@ -959,6 +1042,11 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
959 | if (DO_BIC(BIC_Pkgpc10)) | 1042 | if (DO_BIC(BIC_Pkgpc10)) |
960 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); | 1043 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); |
961 | 1044 | ||
1045 | if (DO_BIC(BIC_CPU_LPI)) | ||
1046 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); | ||
1047 | if (DO_BIC(BIC_SYS_LPI)) | ||
1048 | outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); | ||
1049 | |||
962 | /* | 1050 | /* |
963 | * If measurement interval exceeds minimum RAPL Joule Counter range, | 1051 | * If measurement interval exceeds minimum RAPL Joule Counter range, |
964 | * indicate that results are suspect by printing "**" in fraction place. | 1052 | * indicate that results are suspect by printing "**" in fraction place. |
@@ -1006,7 +1094,8 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
1006 | } | 1094 | } |
1007 | 1095 | ||
1008 | done: | 1096 | done: |
1009 | outp += sprintf(outp, "\n"); | 1097 | if (*(outp - 1) != '\n') |
1098 | outp += sprintf(outp, "\n"); | ||
1010 | 1099 | ||
1011 | return 0; | 1100 | return 0; |
1012 | } | 1101 | } |
@@ -1083,6 +1172,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old) | |||
1083 | old->pc8 = new->pc8 - old->pc8; | 1172 | old->pc8 = new->pc8 - old->pc8; |
1084 | old->pc9 = new->pc9 - old->pc9; | 1173 | old->pc9 = new->pc9 - old->pc9; |
1085 | old->pc10 = new->pc10 - old->pc10; | 1174 | old->pc10 = new->pc10 - old->pc10; |
1175 | old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; | ||
1176 | old->sys_lpi = new->sys_lpi - old->sys_lpi; | ||
1086 | old->pkg_temp_c = new->pkg_temp_c; | 1177 | old->pkg_temp_c = new->pkg_temp_c; |
1087 | 1178 | ||
1088 | /* flag an error when rc6 counter resets/wraps */ | 1179 | /* flag an error when rc6 counter resets/wraps */ |
@@ -1140,6 +1231,15 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
1140 | int i; | 1231 | int i; |
1141 | struct msr_counter *mp; | 1232 | struct msr_counter *mp; |
1142 | 1233 | ||
1234 | /* | ||
1235 | * the timestamps from start of measurement interval are in "old" | ||
1236 | * the timestamp from end of measurement interval are in "new" | ||
1237 | * over-write old w/ new so we can print end of interval values | ||
1238 | */ | ||
1239 | |||
1240 | old->tv_begin = new->tv_begin; | ||
1241 | old->tv_end = new->tv_end; | ||
1242 | |||
1143 | old->tsc = new->tsc - old->tsc; | 1243 | old->tsc = new->tsc - old->tsc; |
1144 | 1244 | ||
1145 | /* check for TSC < 1 Mcycles over interval */ | 1245 | /* check for TSC < 1 Mcycles over interval */ |
@@ -1228,6 +1328,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
1228 | int i; | 1328 | int i; |
1229 | struct msr_counter *mp; | 1329 | struct msr_counter *mp; |
1230 | 1330 | ||
1331 | t->tv_begin.tv_sec = 0; | ||
1332 | t->tv_begin.tv_usec = 0; | ||
1333 | t->tv_end.tv_sec = 0; | ||
1334 | t->tv_end.tv_usec = 0; | ||
1335 | |||
1231 | t->tsc = 0; | 1336 | t->tsc = 0; |
1232 | t->aperf = 0; | 1337 | t->aperf = 0; |
1233 | t->mperf = 0; | 1338 | t->mperf = 0; |
@@ -1260,6 +1365,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
1260 | p->pc8 = 0; | 1365 | p->pc8 = 0; |
1261 | p->pc9 = 0; | 1366 | p->pc9 = 0; |
1262 | p->pc10 = 0; | 1367 | p->pc10 = 0; |
1368 | p->cpu_lpi = 0; | ||
1369 | p->sys_lpi = 0; | ||
1263 | 1370 | ||
1264 | p->energy_pkg = 0; | 1371 | p->energy_pkg = 0; |
1265 | p->energy_dram = 0; | 1372 | p->energy_dram = 0; |
@@ -1286,6 +1393,13 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
1286 | int i; | 1393 | int i; |
1287 | struct msr_counter *mp; | 1394 | struct msr_counter *mp; |
1288 | 1395 | ||
1396 | /* remember first tv_begin */ | ||
1397 | if (average.threads.tv_begin.tv_sec == 0) | ||
1398 | average.threads.tv_begin = t->tv_begin; | ||
1399 | |||
1400 | /* remember last tv_end */ | ||
1401 | average.threads.tv_end = t->tv_end; | ||
1402 | |||
1289 | average.threads.tsc += t->tsc; | 1403 | average.threads.tsc += t->tsc; |
1290 | average.threads.aperf += t->aperf; | 1404 | average.threads.aperf += t->aperf; |
1291 | average.threads.mperf += t->mperf; | 1405 | average.threads.mperf += t->mperf; |
@@ -1341,6 +1455,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
1341 | average.packages.pc9 += p->pc9; | 1455 | average.packages.pc9 += p->pc9; |
1342 | average.packages.pc10 += p->pc10; | 1456 | average.packages.pc10 += p->pc10; |
1343 | 1457 | ||
1458 | average.packages.cpu_lpi = p->cpu_lpi; | ||
1459 | average.packages.sys_lpi = p->sys_lpi; | ||
1460 | |||
1344 | average.packages.energy_pkg += p->energy_pkg; | 1461 | average.packages.energy_pkg += p->energy_pkg; |
1345 | average.packages.energy_dram += p->energy_dram; | 1462 | average.packages.energy_dram += p->energy_dram; |
1346 | average.packages.energy_cores += p->energy_cores; | 1463 | average.packages.energy_cores += p->energy_cores; |
@@ -1487,7 +1604,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) | |||
1487 | if (get_msr(cpu, mp->msr_num, counterp)) | 1604 | if (get_msr(cpu, mp->msr_num, counterp)) |
1488 | return -1; | 1605 | return -1; |
1489 | } else { | 1606 | } else { |
1490 | char path[128]; | 1607 | char path[128 + PATH_BYTES]; |
1491 | 1608 | ||
1492 | if (mp->flags & SYSFS_PERCPU) { | 1609 | if (mp->flags & SYSFS_PERCPU) { |
1493 | sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", | 1610 | sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", |
@@ -1603,7 +1720,7 @@ retry: | |||
1603 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 1720 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
1604 | goto done; | 1721 | goto done; |
1605 | 1722 | ||
1606 | if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { | 1723 | if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) { |
1607 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | 1724 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) |
1608 | return -6; | 1725 | return -6; |
1609 | } | 1726 | } |
@@ -1684,6 +1801,11 @@ retry: | |||
1684 | if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) | 1801 | if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) |
1685 | return -13; | 1802 | return -13; |
1686 | 1803 | ||
1804 | if (DO_BIC(BIC_CPU_LPI)) | ||
1805 | p->cpu_lpi = cpuidle_cur_cpu_lpi_us; | ||
1806 | if (DO_BIC(BIC_SYS_LPI)) | ||
1807 | p->sys_lpi = cpuidle_cur_sys_lpi_us; | ||
1808 | |||
1687 | if (do_rapl & RAPL_PKG) { | 1809 | if (do_rapl & RAPL_PKG) { |
1688 | if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) | 1810 | if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) |
1689 | return -13; | 1811 | return -13; |
@@ -1769,7 +1891,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, | |||
1769 | int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; | 1891 | int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1770 | int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; | 1892 | int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1771 | int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; | 1893 | int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1772 | int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; | 1894 | int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1773 | 1895 | ||
1774 | 1896 | ||
1775 | static void | 1897 | static void |
@@ -2071,12 +2193,9 @@ dump_nhm_cst_cfg(void) | |||
2071 | 2193 | ||
2072 | get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); | 2194 | get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); |
2073 | 2195 | ||
2074 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
2075 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
2076 | |||
2077 | fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); | 2196 | fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); |
2078 | 2197 | ||
2079 | fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", | 2198 | fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", |
2080 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | 2199 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", |
2081 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | 2200 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", |
2082 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | 2201 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", |
@@ -2084,6 +2203,15 @@ dump_nhm_cst_cfg(void) | |||
2084 | (msr & (1 << 15)) ? "" : "UN", | 2203 | (msr & (1 << 15)) ? "" : "UN", |
2085 | (unsigned int)msr & 0xF, | 2204 | (unsigned int)msr & 0xF, |
2086 | pkg_cstate_limit_strings[pkg_cstate_limit]); | 2205 | pkg_cstate_limit_strings[pkg_cstate_limit]); |
2206 | |||
2207 | #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) | ||
2208 | if (has_automatic_cstate_conversion) { | ||
2209 | fprintf(outf, ", automatic c-state conversion=%s", | ||
2210 | (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); | ||
2211 | } | ||
2212 | |||
2213 | fprintf(outf, ")\n"); | ||
2214 | |||
2087 | return; | 2215 | return; |
2088 | } | 2216 | } |
2089 | 2217 | ||
@@ -2184,6 +2312,8 @@ void free_fd_percpu(void) | |||
2184 | 2312 | ||
2185 | void free_all_buffers(void) | 2313 | void free_all_buffers(void) |
2186 | { | 2314 | { |
2315 | int i; | ||
2316 | |||
2187 | CPU_FREE(cpu_present_set); | 2317 | CPU_FREE(cpu_present_set); |
2188 | cpu_present_set = NULL; | 2318 | cpu_present_set = NULL; |
2189 | cpu_present_setsize = 0; | 2319 | cpu_present_setsize = 0; |
@@ -2216,6 +2346,12 @@ void free_all_buffers(void) | |||
2216 | 2346 | ||
2217 | free(irq_column_2_cpu); | 2347 | free(irq_column_2_cpu); |
2218 | free(irqs_per_cpu); | 2348 | free(irqs_per_cpu); |
2349 | |||
2350 | for (i = 0; i <= topo.max_cpu_num; ++i) { | ||
2351 | if (cpus[i].put_ids) | ||
2352 | CPU_FREE(cpus[i].put_ids); | ||
2353 | } | ||
2354 | free(cpus); | ||
2219 | } | 2355 | } |
2220 | 2356 | ||
2221 | 2357 | ||
@@ -2240,44 +2376,6 @@ int parse_int_file(const char *fmt, ...) | |||
2240 | } | 2376 | } |
2241 | 2377 | ||
2242 | /* | 2378 | /* |
2243 | * get_cpu_position_in_core(cpu) | ||
2244 | * return the position of the CPU among its HT siblings in the core | ||
2245 | * return -1 if the sibling is not in list | ||
2246 | */ | ||
2247 | int get_cpu_position_in_core(int cpu) | ||
2248 | { | ||
2249 | char path[64]; | ||
2250 | FILE *filep; | ||
2251 | int this_cpu; | ||
2252 | char character; | ||
2253 | int i; | ||
2254 | |||
2255 | sprintf(path, | ||
2256 | "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", | ||
2257 | cpu); | ||
2258 | filep = fopen(path, "r"); | ||
2259 | if (filep == NULL) { | ||
2260 | perror(path); | ||
2261 | exit(1); | ||
2262 | } | ||
2263 | |||
2264 | for (i = 0; i < topo.num_threads_per_core; i++) { | ||
2265 | fscanf(filep, "%d", &this_cpu); | ||
2266 | if (this_cpu == cpu) { | ||
2267 | fclose(filep); | ||
2268 | return i; | ||
2269 | } | ||
2270 | |||
2271 | /* Account for no separator after last thread*/ | ||
2272 | if (i != (topo.num_threads_per_core - 1)) | ||
2273 | fscanf(filep, "%c", &character); | ||
2274 | } | ||
2275 | |||
2276 | fclose(filep); | ||
2277 | return -1; | ||
2278 | } | ||
2279 | |||
2280 | /* | ||
2281 | * cpu_is_first_core_in_package(cpu) | 2379 | * cpu_is_first_core_in_package(cpu) |
2282 | * return 1 if given CPU is 1st core in package | 2380 | * return 1 if given CPU is 1st core in package |
2283 | */ | 2381 | */ |
@@ -2296,35 +2394,115 @@ int get_core_id(int cpu) | |||
2296 | return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); | 2394 | return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); |
2297 | } | 2395 | } |
2298 | 2396 | ||
2299 | int get_num_ht_siblings(int cpu) | 2397 | void set_node_data(void) |
2300 | { | 2398 | { |
2301 | char path[80]; | 2399 | char path[80]; |
2302 | FILE *filep; | 2400 | FILE *filep; |
2303 | int sib1; | 2401 | int pkg, node, cpu; |
2304 | int matches = 0; | ||
2305 | char character; | ||
2306 | char str[100]; | ||
2307 | char *ch; | ||
2308 | 2402 | ||
2309 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | 2403 | struct pkg_node_info { |
2310 | filep = fopen_or_die(path, "r"); | 2404 | int count; |
2405 | int min; | ||
2406 | } *pni; | ||
2311 | 2407 | ||
2312 | /* | 2408 | pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); |
2313 | * file format: | 2409 | if (!pni) |
2314 | * A ',' separated or '-' separated set of numbers | 2410 | err(1, "calloc pkg_node_count"); |
2315 | * (eg 1-2 or 1,3,4,5) | 2411 | |
2316 | */ | 2412 | for (pkg = 0; pkg < topo.num_packages; pkg++) |
2317 | fscanf(filep, "%d%c\n", &sib1, &character); | 2413 | pni[pkg].min = topo.num_cpus; |
2318 | fseek(filep, 0, SEEK_SET); | 2414 | |
2319 | fgets(str, 100, filep); | 2415 | for (node = 0; node <= topo.max_node_num; node++) { |
2320 | ch = strchr(str, character); | 2416 | /* find the "first" cpu in the node */ |
2321 | while (ch != NULL) { | 2417 | sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); |
2322 | matches++; | 2418 | filep = fopen(path, "r"); |
2323 | ch = strchr(ch+1, character); | 2419 | if (!filep) |
2420 | continue; | ||
2421 | fscanf(filep, "%d", &cpu); | ||
2422 | fclose(filep); | ||
2423 | |||
2424 | pkg = cpus[cpu].physical_package_id; | ||
2425 | pni[pkg].count++; | ||
2426 | |||
2427 | if (node < pni[pkg].min) | ||
2428 | pni[pkg].min = node; | ||
2324 | } | 2429 | } |
2325 | 2430 | ||
2431 | for (pkg = 0; pkg < topo.num_packages; pkg++) | ||
2432 | if (pni[pkg].count > topo.nodes_per_pkg) | ||
2433 | topo.nodes_per_pkg = pni[0].count; | ||
2434 | |||
2435 | for (cpu = 0; cpu < topo.num_cpus; cpu++) { | ||
2436 | pkg = cpus[cpu].physical_package_id; | ||
2437 | node = cpus[cpu].physical_node_id; | ||
2438 | cpus[cpu].logical_node_id = node - pni[pkg].min; | ||
2439 | } | ||
2440 | free(pni); | ||
2441 | |||
2442 | } | ||
2443 | |||
2444 | int get_physical_node_id(struct cpu_topology *thiscpu) | ||
2445 | { | ||
2446 | char path[80]; | ||
2447 | FILE *filep; | ||
2448 | int i; | ||
2449 | int cpu = thiscpu->logical_cpu_id; | ||
2450 | |||
2451 | for (i = 0; i <= topo.max_cpu_num; i++) { | ||
2452 | sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", | ||
2453 | cpu, i); | ||
2454 | filep = fopen(path, "r"); | ||
2455 | if (!filep) | ||
2456 | continue; | ||
2457 | fclose(filep); | ||
2458 | return i; | ||
2459 | } | ||
2460 | return -1; | ||
2461 | } | ||
2462 | |||
2463 | int get_thread_siblings(struct cpu_topology *thiscpu) | ||
2464 | { | ||
2465 | char path[80], character; | ||
2466 | FILE *filep; | ||
2467 | unsigned long map; | ||
2468 | int so, shift, sib_core; | ||
2469 | int cpu = thiscpu->logical_cpu_id; | ||
2470 | int offset = topo.max_cpu_num + 1; | ||
2471 | size_t size; | ||
2472 | int thread_id = 0; | ||
2473 | |||
2474 | thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
2475 | if (thiscpu->thread_id < 0) | ||
2476 | thiscpu->thread_id = thread_id++; | ||
2477 | if (!thiscpu->put_ids) | ||
2478 | return -1; | ||
2479 | |||
2480 | size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
2481 | CPU_ZERO_S(size, thiscpu->put_ids); | ||
2482 | |||
2483 | sprintf(path, | ||
2484 | "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); | ||
2485 | filep = fopen_or_die(path, "r"); | ||
2486 | do { | ||
2487 | offset -= BITMASK_SIZE; | ||
2488 | fscanf(filep, "%lx%c", &map, &character); | ||
2489 | for (shift = 0; shift < BITMASK_SIZE; shift++) { | ||
2490 | if ((map >> shift) & 0x1) { | ||
2491 | so = shift + offset; | ||
2492 | sib_core = get_core_id(so); | ||
2493 | if (sib_core == thiscpu->physical_core_id) { | ||
2494 | CPU_SET_S(so, size, thiscpu->put_ids); | ||
2495 | if ((so != cpu) && | ||
2496 | (cpus[so].thread_id < 0)) | ||
2497 | cpus[so].thread_id = | ||
2498 | thread_id++; | ||
2499 | } | ||
2500 | } | ||
2501 | } | ||
2502 | } while (!strncmp(&character, ",", 1)); | ||
2326 | fclose(filep); | 2503 | fclose(filep); |
2327 | return matches+1; | 2504 | |
2505 | return CPU_COUNT_S(size, thiscpu->put_ids); | ||
2328 | } | 2506 | } |
2329 | 2507 | ||
2330 | /* | 2508 | /* |
@@ -2339,32 +2517,42 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, | |||
2339 | struct thread_data *thread_base2, struct core_data *core_base2, | 2517 | struct thread_data *thread_base2, struct core_data *core_base2, |
2340 | struct pkg_data *pkg_base2) | 2518 | struct pkg_data *pkg_base2) |
2341 | { | 2519 | { |
2342 | int retval, pkg_no, core_no, thread_no; | 2520 | int retval, pkg_no, node_no, core_no, thread_no; |
2343 | 2521 | ||
2344 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { | 2522 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { |
2345 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { | 2523 | for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { |
2346 | for (thread_no = 0; thread_no < | 2524 | for (core_no = 0; core_no < topo.cores_per_node; |
2347 | topo.num_threads_per_core; ++thread_no) { | 2525 | ++core_no) { |
2348 | struct thread_data *t, *t2; | 2526 | for (thread_no = 0; thread_no < |
2349 | struct core_data *c, *c2; | 2527 | topo.threads_per_core; ++thread_no) { |
2350 | struct pkg_data *p, *p2; | 2528 | struct thread_data *t, *t2; |
2351 | 2529 | struct core_data *c, *c2; | |
2352 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); | 2530 | struct pkg_data *p, *p2; |
2353 | 2531 | ||
2354 | if (cpu_is_not_present(t->cpu_id)) | 2532 | t = GET_THREAD(thread_base, thread_no, |
2355 | continue; | 2533 | core_no, node_no, |
2356 | 2534 | pkg_no); | |
2357 | t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); | 2535 | |
2358 | 2536 | if (cpu_is_not_present(t->cpu_id)) | |
2359 | c = GET_CORE(core_base, core_no, pkg_no); | 2537 | continue; |
2360 | c2 = GET_CORE(core_base2, core_no, pkg_no); | 2538 | |
2361 | 2539 | t2 = GET_THREAD(thread_base2, thread_no, | |
2362 | p = GET_PKG(pkg_base, pkg_no); | 2540 | core_no, node_no, |
2363 | p2 = GET_PKG(pkg_base2, pkg_no); | 2541 | pkg_no); |
2364 | 2542 | ||
2365 | retval = func(t, c, p, t2, c2, p2); | 2543 | c = GET_CORE(core_base, core_no, |
2366 | if (retval) | 2544 | node_no, pkg_no); |
2367 | return retval; | 2545 | c2 = GET_CORE(core_base2, core_no, |
2546 | node_no, | ||
2547 | pkg_no); | ||
2548 | |||
2549 | p = GET_PKG(pkg_base, pkg_no); | ||
2550 | p2 = GET_PKG(pkg_base2, pkg_no); | ||
2551 | |||
2552 | retval = func(t, c, p, t2, c2, p2); | ||
2553 | if (retval) | ||
2554 | return retval; | ||
2555 | } | ||
2368 | } | 2556 | } |
2369 | } | 2557 | } |
2370 | } | 2558 | } |
@@ -2409,6 +2597,20 @@ void re_initialize(void) | |||
2409 | printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); | 2597 | printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); |
2410 | } | 2598 | } |
2411 | 2599 | ||
2600 | void set_max_cpu_num(void) | ||
2601 | { | ||
2602 | FILE *filep; | ||
2603 | unsigned long dummy; | ||
2604 | |||
2605 | topo.max_cpu_num = 0; | ||
2606 | filep = fopen_or_die( | ||
2607 | "/sys/devices/system/cpu/cpu0/topology/thread_siblings", | ||
2608 | "r"); | ||
2609 | while (fscanf(filep, "%lx,", &dummy) == 1) | ||
2610 | topo.max_cpu_num += BITMASK_SIZE; | ||
2611 | fclose(filep); | ||
2612 | topo.max_cpu_num--; /* 0 based */ | ||
2613 | } | ||
2412 | 2614 | ||
2413 | /* | 2615 | /* |
2414 | * count_cpus() | 2616 | * count_cpus() |
@@ -2416,10 +2618,7 @@ void re_initialize(void) | |||
2416 | */ | 2618 | */ |
2417 | int count_cpus(int cpu) | 2619 | int count_cpus(int cpu) |
2418 | { | 2620 | { |
2419 | if (topo.max_cpu_num < cpu) | 2621 | topo.num_cpus++; |
2420 | topo.max_cpu_num = cpu; | ||
2421 | |||
2422 | topo.num_cpus += 1; | ||
2423 | return 0; | 2622 | return 0; |
2424 | } | 2623 | } |
2425 | int mark_cpu_present(int cpu) | 2624 | int mark_cpu_present(int cpu) |
@@ -2428,6 +2627,12 @@ int mark_cpu_present(int cpu) | |||
2428 | return 0; | 2627 | return 0; |
2429 | } | 2628 | } |
2430 | 2629 | ||
2630 | int init_thread_id(int cpu) | ||
2631 | { | ||
2632 | cpus[cpu].thread_id = -1; | ||
2633 | return 0; | ||
2634 | } | ||
2635 | |||
2431 | /* | 2636 | /* |
2432 | * snapshot_proc_interrupts() | 2637 | * snapshot_proc_interrupts() |
2433 | * | 2638 | * |
@@ -2542,6 +2747,52 @@ int snapshot_gfx_mhz(void) | |||
2542 | } | 2747 | } |
2543 | 2748 | ||
2544 | /* | 2749 | /* |
2750 | * snapshot_cpu_lpi() | ||
2751 | * | ||
2752 | * record snapshot of | ||
2753 | * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us | ||
2754 | * | ||
2755 | * return 1 if config change requires a restart, else return 0 | ||
2756 | */ | ||
2757 | int snapshot_cpu_lpi_us(void) | ||
2758 | { | ||
2759 | FILE *fp; | ||
2760 | int retval; | ||
2761 | |||
2762 | fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); | ||
2763 | |||
2764 | retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); | ||
2765 | if (retval != 1) | ||
2766 | err(1, "CPU LPI"); | ||
2767 | |||
2768 | fclose(fp); | ||
2769 | |||
2770 | return 0; | ||
2771 | } | ||
2772 | /* | ||
2773 | * snapshot_sys_lpi() | ||
2774 | * | ||
2775 | * record snapshot of | ||
2776 | * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us | ||
2777 | * | ||
2778 | * return 1 if config change requires a restart, else return 0 | ||
2779 | */ | ||
2780 | int snapshot_sys_lpi_us(void) | ||
2781 | { | ||
2782 | FILE *fp; | ||
2783 | int retval; | ||
2784 | |||
2785 | fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); | ||
2786 | |||
2787 | retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); | ||
2788 | if (retval != 1) | ||
2789 | err(1, "SYS LPI"); | ||
2790 | |||
2791 | fclose(fp); | ||
2792 | |||
2793 | return 0; | ||
2794 | } | ||
2795 | /* | ||
2545 | * snapshot /proc and /sys files | 2796 | * snapshot /proc and /sys files |
2546 | * | 2797 | * |
2547 | * return 1 if configuration restart needed, else return 0 | 2798 | * return 1 if configuration restart needed, else return 0 |
@@ -2558,13 +2809,83 @@ int snapshot_proc_sysfs_files(void) | |||
2558 | if (DO_BIC(BIC_GFXMHz)) | 2809 | if (DO_BIC(BIC_GFXMHz)) |
2559 | snapshot_gfx_mhz(); | 2810 | snapshot_gfx_mhz(); |
2560 | 2811 | ||
2812 | if (DO_BIC(BIC_CPU_LPI)) | ||
2813 | snapshot_cpu_lpi_us(); | ||
2814 | |||
2815 | if (DO_BIC(BIC_SYS_LPI)) | ||
2816 | snapshot_sys_lpi_us(); | ||
2817 | |||
2561 | return 0; | 2818 | return 0; |
2562 | } | 2819 | } |
2563 | 2820 | ||
2821 | int exit_requested; | ||
2822 | |||
2823 | static void signal_handler (int signal) | ||
2824 | { | ||
2825 | switch (signal) { | ||
2826 | case SIGINT: | ||
2827 | exit_requested = 1; | ||
2828 | if (debug) | ||
2829 | fprintf(stderr, " SIGINT\n"); | ||
2830 | break; | ||
2831 | case SIGUSR1: | ||
2832 | if (debug > 1) | ||
2833 | fprintf(stderr, "SIGUSR1\n"); | ||
2834 | break; | ||
2835 | } | ||
2836 | /* make sure this manually-invoked interval is at least 1ms long */ | ||
2837 | nanosleep(&one_msec, NULL); | ||
2838 | } | ||
2839 | |||
2840 | void setup_signal_handler(void) | ||
2841 | { | ||
2842 | struct sigaction sa; | ||
2843 | |||
2844 | memset(&sa, 0, sizeof(sa)); | ||
2845 | |||
2846 | sa.sa_handler = &signal_handler; | ||
2847 | |||
2848 | if (sigaction(SIGINT, &sa, NULL) < 0) | ||
2849 | err(1, "sigaction SIGINT"); | ||
2850 | if (sigaction(SIGUSR1, &sa, NULL) < 0) | ||
2851 | err(1, "sigaction SIGUSR1"); | ||
2852 | } | ||
2853 | |||
2854 | void do_sleep(void) | ||
2855 | { | ||
2856 | struct timeval select_timeout; | ||
2857 | fd_set readfds; | ||
2858 | int retval; | ||
2859 | |||
2860 | FD_ZERO(&readfds); | ||
2861 | FD_SET(0, &readfds); | ||
2862 | |||
2863 | if (!isatty(fileno(stdin))) { | ||
2864 | nanosleep(&interval_ts, NULL); | ||
2865 | return; | ||
2866 | } | ||
2867 | |||
2868 | select_timeout = interval_tv; | ||
2869 | retval = select(1, &readfds, NULL, NULL, &select_timeout); | ||
2870 | |||
2871 | if (retval == 1) { | ||
2872 | switch (getc(stdin)) { | ||
2873 | case 'q': | ||
2874 | exit_requested = 1; | ||
2875 | break; | ||
2876 | } | ||
2877 | /* make sure this manually-invoked interval is at least 1ms long */ | ||
2878 | nanosleep(&one_msec, NULL); | ||
2879 | } | ||
2880 | } | ||
2881 | |||
2564 | void turbostat_loop() | 2882 | void turbostat_loop() |
2565 | { | 2883 | { |
2566 | int retval; | 2884 | int retval; |
2567 | int restarted = 0; | 2885 | int restarted = 0; |
2886 | int done_iters = 0; | ||
2887 | |||
2888 | setup_signal_handler(); | ||
2568 | 2889 | ||
2569 | restart: | 2890 | restart: |
2570 | restarted++; | 2891 | restarted++; |
@@ -2581,6 +2902,7 @@ restart: | |||
2581 | goto restart; | 2902 | goto restart; |
2582 | } | 2903 | } |
2583 | restarted = 0; | 2904 | restarted = 0; |
2905 | done_iters = 0; | ||
2584 | gettimeofday(&tv_even, (struct timezone *)NULL); | 2906 | gettimeofday(&tv_even, (struct timezone *)NULL); |
2585 | 2907 | ||
2586 | while (1) { | 2908 | while (1) { |
@@ -2588,7 +2910,7 @@ restart: | |||
2588 | re_initialize(); | 2910 | re_initialize(); |
2589 | goto restart; | 2911 | goto restart; |
2590 | } | 2912 | } |
2591 | nanosleep(&interval_ts, NULL); | 2913 | do_sleep(); |
2592 | if (snapshot_proc_sysfs_files()) | 2914 | if (snapshot_proc_sysfs_files()) |
2593 | goto restart; | 2915 | goto restart; |
2594 | retval = for_all_cpus(get_counters, ODD_COUNTERS); | 2916 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
@@ -2607,7 +2929,11 @@ restart: | |||
2607 | compute_average(EVEN_COUNTERS); | 2929 | compute_average(EVEN_COUNTERS); |
2608 | format_all_counters(EVEN_COUNTERS); | 2930 | format_all_counters(EVEN_COUNTERS); |
2609 | flush_output_stdout(); | 2931 | flush_output_stdout(); |
2610 | nanosleep(&interval_ts, NULL); | 2932 | if (exit_requested) |
2933 | break; | ||
2934 | if (num_iterations && ++done_iters >= num_iterations) | ||
2935 | break; | ||
2936 | do_sleep(); | ||
2611 | if (snapshot_proc_sysfs_files()) | 2937 | if (snapshot_proc_sysfs_files()) |
2612 | goto restart; | 2938 | goto restart; |
2613 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 2939 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
@@ -2626,6 +2952,10 @@ restart: | |||
2626 | compute_average(ODD_COUNTERS); | 2952 | compute_average(ODD_COUNTERS); |
2627 | format_all_counters(ODD_COUNTERS); | 2953 | format_all_counters(ODD_COUNTERS); |
2628 | flush_output_stdout(); | 2954 | flush_output_stdout(); |
2955 | if (exit_requested) | ||
2956 | break; | ||
2957 | if (num_iterations && ++done_iters >= num_iterations) | ||
2958 | break; | ||
2629 | } | 2959 | } |
2630 | } | 2960 | } |
2631 | 2961 | ||
@@ -2740,6 +3070,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) | |||
2740 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ | 3070 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ |
2741 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ | 3071 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ |
2742 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 3072 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
3073 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
2743 | pkg_cstate_limits = hsw_pkg_cstate_limits; | 3074 | pkg_cstate_limits = hsw_pkg_cstate_limits; |
2744 | has_misc_feature_control = 1; | 3075 | has_misc_feature_control = 1; |
2745 | break; | 3076 | break; |
@@ -2945,6 +3276,7 @@ int has_config_tdp(unsigned int family, unsigned int model) | |||
2945 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ | 3276 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ |
2946 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ | 3277 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ |
2947 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 3278 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
3279 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
2948 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ | 3280 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ |
2949 | 3281 | ||
2950 | case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ | 3282 | case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ |
@@ -3399,6 +3731,7 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
3399 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ | 3731 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ |
3400 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ | 3732 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ |
3401 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 3733 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
3734 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
3402 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; | 3735 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; |
3403 | BIC_PRESENT(BIC_PKG__); | 3736 | BIC_PRESENT(BIC_PKG__); |
3404 | BIC_PRESENT(BIC_RAM__); | 3737 | BIC_PRESENT(BIC_RAM__); |
@@ -3523,6 +3856,12 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) | |||
3523 | } | 3856 | } |
3524 | } | 3857 | } |
3525 | 3858 | ||
3859 | void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) | ||
3860 | { | ||
3861 | if (is_skx(family, model) || is_bdx(family, model)) | ||
3862 | has_automatic_cstate_conversion = 1; | ||
3863 | } | ||
3864 | |||
3526 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 3865 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
3527 | { | 3866 | { |
3528 | unsigned long long msr; | 3867 | unsigned long long msr; |
@@ -3728,6 +4067,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) | |||
3728 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ | 4067 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ |
3729 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ | 4068 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ |
3730 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 4069 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
4070 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
3731 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ | 4071 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ |
3732 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | 4072 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3733 | case INTEL_FAM6_ATOM_GEMINI_LAKE: | 4073 | case INTEL_FAM6_ATOM_GEMINI_LAKE: |
@@ -3761,6 +4101,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) | |||
3761 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ | 4101 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ |
3762 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ | 4102 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ |
3763 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 4103 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
4104 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
3764 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | 4105 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3765 | case INTEL_FAM6_ATOM_GEMINI_LAKE: | 4106 | case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3766 | return 1; | 4107 | return 1; |
@@ -3786,6 +4127,7 @@ int has_skl_msrs(unsigned int family, unsigned int model) | |||
3786 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ | 4127 | case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ |
3787 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ | 4128 | case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ |
3788 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 4129 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
4130 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
3789 | return 1; | 4131 | return 1; |
3790 | } | 4132 | } |
3791 | return 0; | 4133 | return 0; |
@@ -3815,6 +4157,19 @@ int is_knl(unsigned int family, unsigned int model) | |||
3815 | return 0; | 4157 | return 0; |
3816 | } | 4158 | } |
3817 | 4159 | ||
4160 | int is_cnl(unsigned int family, unsigned int model) | ||
4161 | { | ||
4162 | if (!genuine_intel) | ||
4163 | return 0; | ||
4164 | |||
4165 | switch (model) { | ||
4166 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
4167 | return 1; | ||
4168 | } | ||
4169 | |||
4170 | return 0; | ||
4171 | } | ||
4172 | |||
3818 | unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) | 4173 | unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) |
3819 | { | 4174 | { |
3820 | if (is_knl(family, model)) | 4175 | if (is_knl(family, model)) |
@@ -3947,7 +4302,7 @@ void decode_misc_enable_msr(void) | |||
3947 | base_cpu, msr, | 4302 | base_cpu, msr, |
3948 | msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", | 4303 | msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", |
3949 | msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", | 4304 | msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", |
3950 | msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "", | 4305 | msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", |
3951 | msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", | 4306 | msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", |
3952 | msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); | 4307 | msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); |
3953 | } | 4308 | } |
@@ -4152,7 +4507,6 @@ void process_cpuid() | |||
4152 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | 4507 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
4153 | crystal_hz = 24000000; /* 24.0 MHz */ | 4508 | crystal_hz = 24000000; /* 24.0 MHz */ |
4154 | break; | 4509 | break; |
4155 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ | ||
4156 | case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ | 4510 | case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ |
4157 | crystal_hz = 25000000; /* 25.0 MHz */ | 4511 | crystal_hz = 25000000; /* 25.0 MHz */ |
4158 | break; | 4512 | break; |
@@ -4253,6 +4607,7 @@ void process_cpuid() | |||
4253 | } | 4607 | } |
4254 | do_slm_cstates = is_slm(family, model); | 4608 | do_slm_cstates = is_slm(family, model); |
4255 | do_knl_cstates = is_knl(family, model); | 4609 | do_knl_cstates = is_knl(family, model); |
4610 | do_cnl_cstates = is_cnl(family, model); | ||
4256 | 4611 | ||
4257 | if (!quiet) | 4612 | if (!quiet) |
4258 | decode_misc_pwr_mgmt_msr(); | 4613 | decode_misc_pwr_mgmt_msr(); |
@@ -4262,6 +4617,7 @@ void process_cpuid() | |||
4262 | 4617 | ||
4263 | rapl_probe(family, model); | 4618 | rapl_probe(family, model); |
4264 | perf_limit_reasons_probe(family, model); | 4619 | perf_limit_reasons_probe(family, model); |
4620 | automatic_cstate_conversion_probe(family, model); | ||
4265 | 4621 | ||
4266 | if (!quiet) | 4622 | if (!quiet) |
4267 | dump_cstate_pstate_config_info(family, model); | 4623 | dump_cstate_pstate_config_info(family, model); |
@@ -4280,6 +4636,16 @@ void process_cpuid() | |||
4280 | if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) | 4636 | if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) |
4281 | BIC_PRESENT(BIC_GFXMHz); | 4637 | BIC_PRESENT(BIC_GFXMHz); |
4282 | 4638 | ||
4639 | if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) | ||
4640 | BIC_PRESENT(BIC_CPU_LPI); | ||
4641 | else | ||
4642 | BIC_NOT_PRESENT(BIC_CPU_LPI); | ||
4643 | |||
4644 | if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK)) | ||
4645 | BIC_PRESENT(BIC_SYS_LPI); | ||
4646 | else | ||
4647 | BIC_NOT_PRESENT(BIC_SYS_LPI); | ||
4648 | |||
4283 | if (!quiet) | 4649 | if (!quiet) |
4284 | decode_misc_feature_control(); | 4650 | decode_misc_feature_control(); |
4285 | 4651 | ||
@@ -4310,14 +4676,10 @@ void topology_probe() | |||
4310 | int max_core_id = 0; | 4676 | int max_core_id = 0; |
4311 | int max_package_id = 0; | 4677 | int max_package_id = 0; |
4312 | int max_siblings = 0; | 4678 | int max_siblings = 0; |
4313 | struct cpu_topology { | ||
4314 | int core_id; | ||
4315 | int physical_package_id; | ||
4316 | } *cpus; | ||
4317 | 4679 | ||
4318 | /* Initialize num_cpus, max_cpu_num */ | 4680 | /* Initialize num_cpus, max_cpu_num */ |
4681 | set_max_cpu_num(); | ||
4319 | topo.num_cpus = 0; | 4682 | topo.num_cpus = 0; |
4320 | topo.max_cpu_num = 0; | ||
4321 | for_all_proc_cpus(count_cpus); | 4683 | for_all_proc_cpus(count_cpus); |
4322 | if (!summary_only && topo.num_cpus > 1) | 4684 | if (!summary_only && topo.num_cpus > 1) |
4323 | BIC_PRESENT(BIC_CPU); | 4685 | BIC_PRESENT(BIC_CPU); |
@@ -4357,6 +4719,7 @@ void topology_probe() | |||
4357 | cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | 4719 | cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); |
4358 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); | 4720 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); |
4359 | 4721 | ||
4722 | for_all_proc_cpus(init_thread_id); | ||
4360 | 4723 | ||
4361 | /* | 4724 | /* |
4362 | * For online cpus | 4725 | * For online cpus |
@@ -4370,26 +4733,45 @@ void topology_probe() | |||
4370 | fprintf(outf, "cpu%d NOT PRESENT\n", i); | 4733 | fprintf(outf, "cpu%d NOT PRESENT\n", i); |
4371 | continue; | 4734 | continue; |
4372 | } | 4735 | } |
4373 | cpus[i].core_id = get_core_id(i); | ||
4374 | if (cpus[i].core_id > max_core_id) | ||
4375 | max_core_id = cpus[i].core_id; | ||
4376 | 4736 | ||
4737 | cpus[i].logical_cpu_id = i; | ||
4738 | |||
4739 | /* get package information */ | ||
4377 | cpus[i].physical_package_id = get_physical_package_id(i); | 4740 | cpus[i].physical_package_id = get_physical_package_id(i); |
4378 | if (cpus[i].physical_package_id > max_package_id) | 4741 | if (cpus[i].physical_package_id > max_package_id) |
4379 | max_package_id = cpus[i].physical_package_id; | 4742 | max_package_id = cpus[i].physical_package_id; |
4380 | 4743 | ||
4381 | siblings = get_num_ht_siblings(i); | 4744 | /* get numa node information */ |
4745 | cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); | ||
4746 | if (cpus[i].physical_node_id > topo.max_node_num) | ||
4747 | topo.max_node_num = cpus[i].physical_node_id; | ||
4748 | |||
4749 | /* get core information */ | ||
4750 | cpus[i].physical_core_id = get_core_id(i); | ||
4751 | if (cpus[i].physical_core_id > max_core_id) | ||
4752 | max_core_id = cpus[i].physical_core_id; | ||
4753 | |||
4754 | /* get thread information */ | ||
4755 | siblings = get_thread_siblings(&cpus[i]); | ||
4382 | if (siblings > max_siblings) | 4756 | if (siblings > max_siblings) |
4383 | max_siblings = siblings; | 4757 | max_siblings = siblings; |
4758 | if (cpus[i].thread_id != -1) | ||
4759 | topo.num_cores++; | ||
4760 | |||
4384 | if (debug > 1) | 4761 | if (debug > 1) |
4385 | fprintf(outf, "cpu %d pkg %d core %d\n", | 4762 | fprintf(outf, |
4386 | i, cpus[i].physical_package_id, cpus[i].core_id); | 4763 | "cpu %d pkg %d node %d core %d thread %d\n", |
4764 | i, cpus[i].physical_package_id, | ||
4765 | cpus[i].physical_node_id, | ||
4766 | cpus[i].physical_core_id, | ||
4767 | cpus[i].thread_id); | ||
4387 | } | 4768 | } |
4388 | topo.num_cores_per_pkg = max_core_id + 1; | 4769 | |
4770 | topo.cores_per_node = max_core_id + 1; | ||
4389 | if (debug > 1) | 4771 | if (debug > 1) |
4390 | fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", | 4772 | fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", |
4391 | max_core_id, topo.num_cores_per_pkg); | 4773 | max_core_id, topo.cores_per_node); |
4392 | if (!summary_only && topo.num_cores_per_pkg > 1) | 4774 | if (!summary_only && topo.cores_per_node > 1) |
4393 | BIC_PRESENT(BIC_Core); | 4775 | BIC_PRESENT(BIC_Core); |
4394 | 4776 | ||
4395 | topo.num_packages = max_package_id + 1; | 4777 | topo.num_packages = max_package_id + 1; |
@@ -4399,33 +4781,38 @@ void topology_probe() | |||
4399 | if (!summary_only && topo.num_packages > 1) | 4781 | if (!summary_only && topo.num_packages > 1) |
4400 | BIC_PRESENT(BIC_Package); | 4782 | BIC_PRESENT(BIC_Package); |
4401 | 4783 | ||
4402 | topo.num_threads_per_core = max_siblings; | 4784 | set_node_data(); |
4403 | if (debug > 1) | 4785 | if (debug > 1) |
4404 | fprintf(outf, "max_siblings %d\n", max_siblings); | 4786 | fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); |
4787 | if (!summary_only && topo.nodes_per_pkg > 1) | ||
4788 | BIC_PRESENT(BIC_Node); | ||
4405 | 4789 | ||
4406 | free(cpus); | 4790 | topo.threads_per_core = max_siblings; |
4791 | if (debug > 1) | ||
4792 | fprintf(outf, "max_siblings %d\n", max_siblings); | ||
4407 | } | 4793 | } |
4408 | 4794 | ||
4409 | void | 4795 | void |
4410 | allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) | 4796 | allocate_counters(struct thread_data **t, struct core_data **c, |
4797 | struct pkg_data **p) | ||
4411 | { | 4798 | { |
4412 | int i; | 4799 | int i; |
4800 | int num_cores = topo.cores_per_node * topo.nodes_per_pkg * | ||
4801 | topo.num_packages; | ||
4802 | int num_threads = topo.threads_per_core * num_cores; | ||
4413 | 4803 | ||
4414 | *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * | 4804 | *t = calloc(num_threads, sizeof(struct thread_data)); |
4415 | topo.num_packages, sizeof(struct thread_data)); | ||
4416 | if (*t == NULL) | 4805 | if (*t == NULL) |
4417 | goto error; | 4806 | goto error; |
4418 | 4807 | ||
4419 | for (i = 0; i < topo.num_threads_per_core * | 4808 | for (i = 0; i < num_threads; i++) |
4420 | topo.num_cores_per_pkg * topo.num_packages; i++) | ||
4421 | (*t)[i].cpu_id = -1; | 4809 | (*t)[i].cpu_id = -1; |
4422 | 4810 | ||
4423 | *c = calloc(topo.num_cores_per_pkg * topo.num_packages, | 4811 | *c = calloc(num_cores, sizeof(struct core_data)); |
4424 | sizeof(struct core_data)); | ||
4425 | if (*c == NULL) | 4812 | if (*c == NULL) |
4426 | goto error; | 4813 | goto error; |
4427 | 4814 | ||
4428 | for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) | 4815 | for (i = 0; i < num_cores; i++) |
4429 | (*c)[i].core_id = -1; | 4816 | (*c)[i].core_id = -1; |
4430 | 4817 | ||
4431 | *p = calloc(topo.num_packages, sizeof(struct pkg_data)); | 4818 | *p = calloc(topo.num_packages, sizeof(struct pkg_data)); |
@@ -4442,47 +4829,39 @@ error: | |||
4442 | /* | 4829 | /* |
4443 | * init_counter() | 4830 | * init_counter() |
4444 | * | 4831 | * |
4445 | * set cpu_id, core_num, pkg_num | ||
4446 | * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE | 4832 | * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE |
4447 | * | ||
4448 | * increment topo.num_cores when 1st core in pkg seen | ||
4449 | */ | 4833 | */ |
4450 | void init_counter(struct thread_data *thread_base, struct core_data *core_base, | 4834 | void init_counter(struct thread_data *thread_base, struct core_data *core_base, |
4451 | struct pkg_data *pkg_base, int thread_num, int core_num, | 4835 | struct pkg_data *pkg_base, int cpu_id) |
4452 | int pkg_num, int cpu_id) | ||
4453 | { | 4836 | { |
4837 | int pkg_id = cpus[cpu_id].physical_package_id; | ||
4838 | int node_id = cpus[cpu_id].logical_node_id; | ||
4839 | int core_id = cpus[cpu_id].physical_core_id; | ||
4840 | int thread_id = cpus[cpu_id].thread_id; | ||
4454 | struct thread_data *t; | 4841 | struct thread_data *t; |
4455 | struct core_data *c; | 4842 | struct core_data *c; |
4456 | struct pkg_data *p; | 4843 | struct pkg_data *p; |
4457 | 4844 | ||
4458 | t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); | 4845 | t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); |
4459 | c = GET_CORE(core_base, core_num, pkg_num); | 4846 | c = GET_CORE(core_base, core_id, node_id, pkg_id); |
4460 | p = GET_PKG(pkg_base, pkg_num); | 4847 | p = GET_PKG(pkg_base, pkg_id); |
4461 | 4848 | ||
4462 | t->cpu_id = cpu_id; | 4849 | t->cpu_id = cpu_id; |
4463 | if (thread_num == 0) { | 4850 | if (thread_id == 0) { |
4464 | t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; | 4851 | t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; |
4465 | if (cpu_is_first_core_in_package(cpu_id)) | 4852 | if (cpu_is_first_core_in_package(cpu_id)) |
4466 | t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; | 4853 | t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; |
4467 | } | 4854 | } |
4468 | 4855 | ||
4469 | c->core_id = core_num; | 4856 | c->core_id = core_id; |
4470 | p->package_id = pkg_num; | 4857 | p->package_id = pkg_id; |
4471 | } | 4858 | } |
4472 | 4859 | ||
4473 | 4860 | ||
4474 | int initialize_counters(int cpu_id) | 4861 | int initialize_counters(int cpu_id) |
4475 | { | 4862 | { |
4476 | int my_thread_id, my_core_id, my_package_id; | 4863 | init_counter(EVEN_COUNTERS, cpu_id); |
4477 | 4864 | init_counter(ODD_COUNTERS, cpu_id); | |
4478 | my_package_id = get_physical_package_id(cpu_id); | ||
4479 | my_core_id = get_core_id(cpu_id); | ||
4480 | my_thread_id = get_cpu_position_in_core(cpu_id); | ||
4481 | if (!my_thread_id) | ||
4482 | topo.num_cores++; | ||
4483 | |||
4484 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
4485 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
4486 | return 0; | 4865 | return 0; |
4487 | } | 4866 | } |
4488 | 4867 | ||
@@ -4630,7 +5009,7 @@ int get_and_dump_counters(void) | |||
4630 | } | 5009 | } |
4631 | 5010 | ||
4632 | void print_version() { | 5011 | void print_version() { |
4633 | fprintf(outf, "turbostat version 17.06.23" | 5012 | fprintf(outf, "turbostat version 18.06.01" |
4634 | " - Len Brown <lenb@kernel.org>\n"); | 5013 | " - Len Brown <lenb@kernel.org>\n"); |
4635 | } | 5014 | } |
4636 | 5015 | ||
@@ -4661,7 +5040,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, | |||
4661 | msrp->next = sys.tp; | 5040 | msrp->next = sys.tp; |
4662 | sys.tp = msrp; | 5041 | sys.tp = msrp; |
4663 | sys.added_thread_counters++; | 5042 | sys.added_thread_counters++; |
4664 | if (sys.added_thread_counters > MAX_ADDED_COUNTERS) { | 5043 | if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { |
4665 | fprintf(stderr, "exceeded max %d added thread counters\n", | 5044 | fprintf(stderr, "exceeded max %d added thread counters\n", |
4666 | MAX_ADDED_COUNTERS); | 5045 | MAX_ADDED_COUNTERS); |
4667 | exit(-1); | 5046 | exit(-1); |
@@ -4820,7 +5199,7 @@ void probe_sysfs(void) | |||
4820 | if (!DO_BIC(BIC_sysfs)) | 5199 | if (!DO_BIC(BIC_sysfs)) |
4821 | return; | 5200 | return; |
4822 | 5201 | ||
4823 | for (state = 10; state > 0; --state) { | 5202 | for (state = 10; state >= 0; --state) { |
4824 | 5203 | ||
4825 | sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", | 5204 | sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", |
4826 | base_cpu, state); | 5205 | base_cpu, state); |
@@ -4847,7 +5226,7 @@ void probe_sysfs(void) | |||
4847 | FORMAT_PERCENT, SYSFS_PERCPU); | 5226 | FORMAT_PERCENT, SYSFS_PERCPU); |
4848 | } | 5227 | } |
4849 | 5228 | ||
4850 | for (state = 10; state > 0; --state) { | 5229 | for (state = 10; state >= 0; --state) { |
4851 | 5230 | ||
4852 | sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", | 5231 | sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", |
4853 | base_cpu, state); | 5232 | base_cpu, state); |
@@ -4960,34 +5339,6 @@ error: | |||
4960 | exit(-1); | 5339 | exit(-1); |
4961 | } | 5340 | } |
4962 | 5341 | ||
4963 | int shown; | ||
4964 | /* | ||
4965 | * parse_show_hide() - process cmdline to set default counter action | ||
4966 | */ | ||
4967 | void parse_show_hide(char *optarg, enum show_hide_mode new_mode) | ||
4968 | { | ||
4969 | /* | ||
4970 | * --show: show only those specified | ||
4971 | * The 1st invocation will clear and replace the enabled mask | ||
4972 | * subsequent invocations can add to it. | ||
4973 | */ | ||
4974 | if (new_mode == SHOW_LIST) { | ||
4975 | if (shown == 0) | ||
4976 | bic_enabled = bic_lookup(optarg, new_mode); | ||
4977 | else | ||
4978 | bic_enabled |= bic_lookup(optarg, new_mode); | ||
4979 | shown = 1; | ||
4980 | |||
4981 | return; | ||
4982 | } | ||
4983 | |||
4984 | /* | ||
4985 | * --hide: do not show those specified | ||
4986 | * multiple invocations simply clear more bits in enabled mask | ||
4987 | */ | ||
4988 | bic_enabled &= ~bic_lookup(optarg, new_mode); | ||
4989 | |||
4990 | } | ||
4991 | 5342 | ||
4992 | void cmdline(int argc, char **argv) | 5343 | void cmdline(int argc, char **argv) |
4993 | { | 5344 | { |
@@ -4998,7 +5349,9 @@ void cmdline(int argc, char **argv) | |||
4998 | {"cpu", required_argument, 0, 'c'}, | 5349 | {"cpu", required_argument, 0, 'c'}, |
4999 | {"Dump", no_argument, 0, 'D'}, | 5350 | {"Dump", no_argument, 0, 'D'}, |
5000 | {"debug", no_argument, 0, 'd'}, /* internal, not documented */ | 5351 | {"debug", no_argument, 0, 'd'}, /* internal, not documented */ |
5352 | {"enable", required_argument, 0, 'e'}, | ||
5001 | {"interval", required_argument, 0, 'i'}, | 5353 | {"interval", required_argument, 0, 'i'}, |
5354 | {"num_iterations", required_argument, 0, 'n'}, | ||
5002 | {"help", no_argument, 0, 'h'}, | 5355 | {"help", no_argument, 0, 'h'}, |
5003 | {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help | 5356 | {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help |
5004 | {"Joules", no_argument, 0, 'J'}, | 5357 | {"Joules", no_argument, 0, 'J'}, |
@@ -5014,7 +5367,7 @@ void cmdline(int argc, char **argv) | |||
5014 | 5367 | ||
5015 | progname = argv[0]; | 5368 | progname = argv[0]; |
5016 | 5369 | ||
5017 | while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", | 5370 | while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", |
5018 | long_options, &option_index)) != -1) { | 5371 | long_options, &option_index)) != -1) { |
5019 | switch (opt) { | 5372 | switch (opt) { |
5020 | case 'a': | 5373 | case 'a': |
@@ -5026,11 +5379,20 @@ void cmdline(int argc, char **argv) | |||
5026 | case 'D': | 5379 | case 'D': |
5027 | dump_only++; | 5380 | dump_only++; |
5028 | break; | 5381 | break; |
5382 | case 'e': | ||
5383 | /* --enable specified counter */ | ||
5384 | bic_enabled |= bic_lookup(optarg, SHOW_LIST); | ||
5385 | break; | ||
5029 | case 'd': | 5386 | case 'd': |
5030 | debug++; | 5387 | debug++; |
5388 | ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); | ||
5031 | break; | 5389 | break; |
5032 | case 'H': | 5390 | case 'H': |
5033 | parse_show_hide(optarg, HIDE_LIST); | 5391 | /* |
5392 | * --hide: do not show those specified | ||
5393 | * multiple invocations simply clear more bits in enabled mask | ||
5394 | */ | ||
5395 | bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); | ||
5034 | break; | 5396 | break; |
5035 | case 'h': | 5397 | case 'h': |
5036 | default: | 5398 | default: |
@@ -5046,7 +5408,8 @@ void cmdline(int argc, char **argv) | |||
5046 | exit(2); | 5408 | exit(2); |
5047 | } | 5409 | } |
5048 | 5410 | ||
5049 | interval_ts.tv_sec = interval; | 5411 | interval_tv.tv_sec = interval_ts.tv_sec = interval; |
5412 | interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; | ||
5050 | interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; | 5413 | interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; |
5051 | } | 5414 | } |
5052 | break; | 5415 | break; |
@@ -5054,6 +5417,7 @@ void cmdline(int argc, char **argv) | |||
5054 | rapl_joules++; | 5417 | rapl_joules++; |
5055 | break; | 5418 | break; |
5056 | case 'l': | 5419 | case 'l': |
5420 | ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); | ||
5057 | list_header_only++; | 5421 | list_header_only++; |
5058 | quiet++; | 5422 | quiet++; |
5059 | break; | 5423 | break; |
@@ -5063,8 +5427,26 @@ void cmdline(int argc, char **argv) | |||
5063 | case 'q': | 5427 | case 'q': |
5064 | quiet = 1; | 5428 | quiet = 1; |
5065 | break; | 5429 | break; |
5430 | case 'n': | ||
5431 | num_iterations = strtod(optarg, NULL); | ||
5432 | |||
5433 | if (num_iterations <= 0) { | ||
5434 | fprintf(outf, "iterations %d should be positive number\n", | ||
5435 | num_iterations); | ||
5436 | exit(2); | ||
5437 | } | ||
5438 | break; | ||
5066 | case 's': | 5439 | case 's': |
5067 | parse_show_hide(optarg, SHOW_LIST); | 5440 | /* |
5441 | * --show: show only those specified | ||
5442 | * The 1st invocation will clear and replace the enabled mask | ||
5443 | * subsequent invocations can add to it. | ||
5444 | */ | ||
5445 | if (shown == 0) | ||
5446 | bic_enabled = bic_lookup(optarg, SHOW_LIST); | ||
5447 | else | ||
5448 | bic_enabled |= bic_lookup(optarg, SHOW_LIST); | ||
5449 | shown = 1; | ||
5068 | break; | 5450 | break; |
5069 | case 'S': | 5451 | case 'S': |
5070 | summary_only++; | 5452 | summary_only++; |
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 2447b1bbaacf..f4534fb8b951 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile | |||
@@ -24,5 +24,5 @@ install : x86_energy_perf_policy | |||
24 | install -d $(DESTDIR)$(PREFIX)/bin | 24 | install -d $(DESTDIR)$(PREFIX)/bin |
25 | install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy | 25 | install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy |
26 | install -d $(DESTDIR)$(PREFIX)/share/man/man8 | 26 | install -d $(DESTDIR)$(PREFIX)/share/man/man8 |
27 | install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 | 27 | install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 |
28 | 28 | ||