aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-21 16:57:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-21 16:57:36 -0400
commit26c92a38cec99ee7b657901ff64bd01a96a2fb22 (patch)
tree8c5c51b76354a81544c9e1f15d29606f859113eb
parent1abd8a8f39cd9a2925149000056494523c85643a (diff)
parentb51e001385f1c861445af3efeab766811ee8db29 (diff)
Merge tag 'pm-4.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These are mostly fixes, including some fixes for changes made during the recent merge window and some "stable" material, plus some minor extensions of the turbostat utility. Specifics: - Fix the PM core to avoid introducing a runtime PM usage counter imbalance when adding device links during driver probe (Rafael Wysocki). - Fix the operating performance points (OPP) framework to ensure that the regulator voltage is always updated as appropriate when updating clock rates (Waldemar Rymarkiewicz). - Fix the intel_pstate driver to use correct max/min limits for cores with differing maximum frequences (Srinivas Pandruvada). - Fix a typo in the intel_pstate driver documentation (Rafael Wysocki). - Fix two issues with the recently added Kryo cpufreq driver (Ilia Lin). - Fix two recent regressions and some other minor issues in the turbostat utility and extend it to provide some more diagnostic information (Len Brown, Nathan Ciobanu)" * tag 'pm-4.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: Documentation: intel_pstate: Fix typo tools/power turbostat: version 18.06.20 tools/power turbostat: add the missing command line switches tools/power turbostat: add single character tokens to help tools/power turbostat: alphabetize the help output tools/power turbostat: fix segfault on 'no node' machines tools/power turbostat: add optional APIC X2APIC columns tools/power turbostat: decode cpuid.1.HT tools/power turbostat: fix show/hide issues resulting from mis-merge PM / OPP: Update voltage in case freq == old_freq cpufreq: intel_pstate: Fix scaling max/min limits with Turbo 3.0 cpufreq: kryo: Add module remove and exit cpufreq: kryo: Fix possible error code dereference PM / core: Fix supplier device runtime PM usage counter imbalance
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst2
-rw-r--r--drivers/base/core.c15
-rw-r--r--drivers/cpufreq/intel_pstate.c27
-rw-r--r--drivers/cpufreq/qcom-cpufreq-kryo.c25
-rw-r--r--drivers/opp/core.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.82
-rw-r--r--tools/power/x86/turbostat/turbostat.c240
7 files changed, 224 insertions, 89 deletions
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ab2fe0eda1d7..8b9164990956 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -410,7 +410,7 @@ argument is passed to the kernel in the command line.
410 That only is supported in some configurations, though (for example, if 410 That only is supported in some configurations, though (for example, if
411 the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, 411 the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
412 the operation mode of the driver cannot be changed), and if it is not 412 the operation mode of the driver cannot be changed), and if it is not
413 supported in the current configuration, writes to this attribute with 413 supported in the current configuration, writes to this attribute will
414 fail with an appropriate error. 414 fail with an appropriate error.
415 415
416Interpretation of Policy Attributes 416Interpretation of Policy Attributes
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 36622b52e419..df3e1a44707a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -236,6 +236,13 @@ struct device_link *device_link_add(struct device *consumer,
236 link->rpm_active = true; 236 link->rpm_active = true;
237 } 237 }
238 pm_runtime_new_link(consumer); 238 pm_runtime_new_link(consumer);
239 /*
240 * If the link is being added by the consumer driver at probe
241 * time, balance the decrementation of the supplier's runtime PM
242 * usage counter after consumer probe in driver_probe_device().
243 */
244 if (consumer->links.status == DL_DEV_PROBING)
245 pm_runtime_get_noresume(supplier);
239 } 246 }
240 get_device(supplier); 247 get_device(supplier);
241 link->supplier = supplier; 248 link->supplier = supplier;
@@ -255,12 +262,12 @@ struct device_link *device_link_add(struct device *consumer,
255 switch (consumer->links.status) { 262 switch (consumer->links.status) {
256 case DL_DEV_PROBING: 263 case DL_DEV_PROBING:
257 /* 264 /*
258 * Balance the decrementation of the supplier's 265 * Some callers expect the link creation during
259 * runtime PM usage counter after consumer probe 266 * consumer driver probe to resume the supplier
260 * in driver_probe_device(). 267 * even without DL_FLAG_RPM_ACTIVE.
261 */ 268 */
262 if (flags & DL_FLAG_PM_RUNTIME) 269 if (flags & DL_FLAG_PM_RUNTIME)
263 pm_runtime_get_sync(supplier); 270 pm_runtime_resume(supplier);
264 271
265 link->status = DL_STATE_CONSUMER_PROBE; 272 link->status = DL_STATE_CONSUMER_PROBE;
266 break; 273 break;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1de5ec8d5ea3..ece120da3353 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -294,6 +294,7 @@ struct pstate_funcs {
294static struct pstate_funcs pstate_funcs __read_mostly; 294static struct pstate_funcs pstate_funcs __read_mostly;
295 295
296static int hwp_active __read_mostly; 296static int hwp_active __read_mostly;
297static int hwp_mode_bdw __read_mostly;
297static bool per_cpu_limits __read_mostly; 298static bool per_cpu_limits __read_mostly;
298static bool hwp_boost __read_mostly; 299static bool hwp_boost __read_mostly;
299 300
@@ -1413,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1413 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 1414 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
1414 cpu->pstate.scaling = pstate_funcs.get_scaling(); 1415 cpu->pstate.scaling = pstate_funcs.get_scaling();
1415 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; 1416 cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
1416 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1417
1418 if (hwp_active && !hwp_mode_bdw) {
1419 unsigned int phy_max, current_max;
1420
1421 intel_pstate_get_hwp_max(cpu->cpu, &phy_max, &current_max);
1422 cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
1423 } else {
1424 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1425 }
1417 1426
1418 if (pstate_funcs.get_aperf_mperf_shift) 1427 if (pstate_funcs.get_aperf_mperf_shift)
1419 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); 1428 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
@@ -2467,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
2467static inline void intel_pstate_request_control_from_smm(void) {} 2476static inline void intel_pstate_request_control_from_smm(void) {}
2468#endif /* CONFIG_ACPI */ 2477#endif /* CONFIG_ACPI */
2469 2478
2479#define INTEL_PSTATE_HWP_BROADWELL 0x01
2480
2481#define ICPU_HWP(model, hwp_mode) \
2482 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
2483
2470static const struct x86_cpu_id hwp_support_ids[] __initconst = { 2484static const struct x86_cpu_id hwp_support_ids[] __initconst = {
2471 { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, 2485 ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
2486 ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL),
2487 ICPU_HWP(X86_MODEL_ANY, 0),
2472 {} 2488 {}
2473}; 2489};
2474 2490
2475static int __init intel_pstate_init(void) 2491static int __init intel_pstate_init(void)
2476{ 2492{
2493 const struct x86_cpu_id *id;
2477 int rc; 2494 int rc;
2478 2495
2479 if (no_load) 2496 if (no_load)
2480 return -ENODEV; 2497 return -ENODEV;
2481 2498
2482 if (x86_match_cpu(hwp_support_ids)) { 2499 id = x86_match_cpu(hwp_support_ids);
2500 if (id) {
2483 copy_cpu_funcs(&core_funcs); 2501 copy_cpu_funcs(&core_funcs);
2484 if (!no_hwp) { 2502 if (!no_hwp) {
2485 hwp_active++; 2503 hwp_active++;
2504 hwp_mode_bdw = id->driver_data;
2486 intel_pstate.attr = hwp_cpufreq_attrs; 2505 intel_pstate.attr = hwp_cpufreq_attrs;
2487 goto hwp_cpu_matched; 2506 goto hwp_cpu_matched;
2488 } 2507 }
2489 } else { 2508 } else {
2490 const struct x86_cpu_id *id;
2491
2492 id = x86_match_cpu(intel_pstate_cpu_ids); 2509 id = x86_match_cpu(intel_pstate_cpu_ids);
2493 if (!id) 2510 if (!id)
2494 return -ENODEV; 2511 return -ENODEV;
diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
index d049fe4b80c4..01bddacf5c3b 100644
--- a/drivers/cpufreq/qcom-cpufreq-kryo.c
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -42,6 +42,8 @@ enum _msm8996_version {
42 NUM_OF_MSM8996_VERSIONS, 42 NUM_OF_MSM8996_VERSIONS,
43}; 43};
44 44
45struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
46
45static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void) 47static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
46{ 48{
47 size_t len; 49 size_t len;
@@ -74,7 +76,6 @@ static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
74static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) 76static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
75{ 77{
76 struct opp_table *opp_tables[NR_CPUS] = {0}; 78 struct opp_table *opp_tables[NR_CPUS] = {0};
77 struct platform_device *cpufreq_dt_pdev;
78 enum _msm8996_version msm8996_version; 79 enum _msm8996_version msm8996_version;
79 struct nvmem_cell *speedbin_nvmem; 80 struct nvmem_cell *speedbin_nvmem;
80 struct device_node *np; 81 struct device_node *np;
@@ -115,6 +116,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
115 116
116 speedbin = nvmem_cell_read(speedbin_nvmem, &len); 117 speedbin = nvmem_cell_read(speedbin_nvmem, &len);
117 nvmem_cell_put(speedbin_nvmem); 118 nvmem_cell_put(speedbin_nvmem);
119 if (IS_ERR(speedbin))
120 return PTR_ERR(speedbin);
118 121
119 switch (msm8996_version) { 122 switch (msm8996_version) {
120 case MSM8996_V3: 123 case MSM8996_V3:
@@ -127,6 +130,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
127 BUG(); 130 BUG();
128 break; 131 break;
129 } 132 }
133 kfree(speedbin);
130 134
131 for_each_possible_cpu(cpu) { 135 for_each_possible_cpu(cpu) {
132 cpu_dev = get_cpu_device(cpu); 136 cpu_dev = get_cpu_device(cpu);
@@ -162,8 +166,15 @@ free_opp:
162 return ret; 166 return ret;
163} 167}
164 168
169static int qcom_cpufreq_kryo_remove(struct platform_device *pdev)
170{
171 platform_device_unregister(cpufreq_dt_pdev);
172 return 0;
173}
174
165static struct platform_driver qcom_cpufreq_kryo_driver = { 175static struct platform_driver qcom_cpufreq_kryo_driver = {
166 .probe = qcom_cpufreq_kryo_probe, 176 .probe = qcom_cpufreq_kryo_probe,
177 .remove = qcom_cpufreq_kryo_remove,
167 .driver = { 178 .driver = {
168 .name = "qcom-cpufreq-kryo", 179 .name = "qcom-cpufreq-kryo",
169 }, 180 },
@@ -198,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void)
198 if (unlikely(ret < 0)) 209 if (unlikely(ret < 0))
199 return ret; 210 return ret;
200 211
201 ret = PTR_ERR_OR_ZERO(platform_device_register_simple( 212 kryo_cpufreq_pdev = platform_device_register_simple(
202 "qcom-cpufreq-kryo", -1, NULL, 0)); 213 "qcom-cpufreq-kryo", -1, NULL, 0);
214 ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev);
203 if (0 == ret) 215 if (0 == ret)
204 return 0; 216 return 0;
205 217
@@ -208,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void)
208} 220}
209module_init(qcom_cpufreq_kryo_init); 221module_init(qcom_cpufreq_kryo_init);
210 222
223static void __init qcom_cpufreq_kryo_exit(void)
224{
225 platform_device_unregister(kryo_cpufreq_pdev);
226 platform_driver_unregister(&qcom_cpufreq_kryo_driver);
227}
228module_exit(qcom_cpufreq_kryo_exit);
229
211MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver"); 230MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
212MODULE_LICENSE("GPL v2"); 231MODULE_LICENSE("GPL v2");
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index ab2f3fead6b1..31ff03dbeb83 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -598,7 +598,7 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table,
598 } 598 }
599 599
600 /* Scaling up? Scale voltage before frequency */ 600 /* Scaling up? Scale voltage before frequency */
601 if (freq > old_freq) { 601 if (freq >= old_freq) {
602 ret = _set_opp_voltage(dev, reg, new_supply); 602 ret = _set_opp_voltage(dev, reg, new_supply);
603 if (ret) 603 if (ret)
604 goto restore_voltage; 604 goto restore_voltage;
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ca9ef7017624..d39e4ff7d0bf 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -56,7 +56,7 @@ name as necessary to disambiguate it from others is necessary. Note that option
56.PP 56.PP
57\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. 57\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group.
58.PP 58.PP
59\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds". 59\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
60The column name "all" can be used to enable all disabled-by-default built-in counters. 60The column name "all" can be used to enable all disabled-by-default built-in counters.
61.PP 61.PP
62\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. 62\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d6cff3070ebd..4d14bbbf9b63 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
109unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 109unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
110unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 110unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
111unsigned int has_misc_feature_control; 111unsigned int has_misc_feature_control;
112unsigned int first_counter_read = 1;
112 113
113#define RAPL_PKG (1 << 0) 114#define RAPL_PKG (1 << 0)
114 /* 0x610 MSR_PKG_POWER_LIMIT */ 115 /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -170,6 +171,8 @@ struct thread_data {
170 unsigned long long irq_count; 171 unsigned long long irq_count;
171 unsigned int smi_count; 172 unsigned int smi_count;
172 unsigned int cpu_id; 173 unsigned int cpu_id;
174 unsigned int apic_id;
175 unsigned int x2apic_id;
173 unsigned int flags; 176 unsigned int flags;
174#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 177#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
175#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 178#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
@@ -381,19 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
381} 384}
382 385
383/* 386/*
384 * Each string in this array is compared in --show and --hide cmdline. 387 * This list matches the column headers, except
385 * Thus, strings that are proper sub-sets must follow their more specific peers. 388 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
389 * 2. Core and CPU are moved to the end, we can't have strings that contain them
390 * matching on them for --show and --hide.
386 */ 391 */
387struct msr_counter bic[] = { 392struct msr_counter bic[] = {
388 { 0x0, "usec" }, 393 { 0x0, "usec" },
389 { 0x0, "Time_Of_Day_Seconds" }, 394 { 0x0, "Time_Of_Day_Seconds" },
390 { 0x0, "Package" }, 395 { 0x0, "Package" },
396 { 0x0, "Node" },
391 { 0x0, "Avg_MHz" }, 397 { 0x0, "Avg_MHz" },
398 { 0x0, "Busy%" },
392 { 0x0, "Bzy_MHz" }, 399 { 0x0, "Bzy_MHz" },
393 { 0x0, "TSC_MHz" }, 400 { 0x0, "TSC_MHz" },
394 { 0x0, "IRQ" }, 401 { 0x0, "IRQ" },
395 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, 402 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
396 { 0x0, "Busy%" }, 403 { 0x0, "sysfs" },
397 { 0x0, "CPU%c1" }, 404 { 0x0, "CPU%c1" },
398 { 0x0, "CPU%c3" }, 405 { 0x0, "CPU%c3" },
399 { 0x0, "CPU%c6" }, 406 { 0x0, "CPU%c6" },
@@ -424,73 +431,73 @@ struct msr_counter bic[] = {
424 { 0x0, "Cor_J" }, 431 { 0x0, "Cor_J" },
425 { 0x0, "GFX_J" }, 432 { 0x0, "GFX_J" },
426 { 0x0, "RAM_J" }, 433 { 0x0, "RAM_J" },
427 { 0x0, "Core" },
428 { 0x0, "CPU" },
429 { 0x0, "Mod%c6" }, 434 { 0x0, "Mod%c6" },
430 { 0x0, "sysfs" },
431 { 0x0, "Totl%C0" }, 435 { 0x0, "Totl%C0" },
432 { 0x0, "Any%C0" }, 436 { 0x0, "Any%C0" },
433 { 0x0, "GFX%C0" }, 437 { 0x0, "GFX%C0" },
434 { 0x0, "CPUGFX%" }, 438 { 0x0, "CPUGFX%" },
435 { 0x0, "Node%" }, 439 { 0x0, "Core" },
440 { 0x0, "CPU" },
441 { 0x0, "APIC" },
442 { 0x0, "X2APIC" },
436}; 443};
437 444
438
439
440#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 445#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
441#define BIC_USEC (1ULL << 0) 446#define BIC_USEC (1ULL << 0)
442#define BIC_TOD (1ULL << 1) 447#define BIC_TOD (1ULL << 1)
443#define BIC_Package (1ULL << 2) 448#define BIC_Package (1ULL << 2)
444#define BIC_Avg_MHz (1ULL << 3) 449#define BIC_Node (1ULL << 3)
445#define BIC_Bzy_MHz (1ULL << 4) 450#define BIC_Avg_MHz (1ULL << 4)
446#define BIC_TSC_MHz (1ULL << 5) 451#define BIC_Busy (1ULL << 5)
447#define BIC_IRQ (1ULL << 6) 452#define BIC_Bzy_MHz (1ULL << 6)
448#define BIC_SMI (1ULL << 7) 453#define BIC_TSC_MHz (1ULL << 7)
449#define BIC_Busy (1ULL << 8) 454#define BIC_IRQ (1ULL << 8)
450#define BIC_CPU_c1 (1ULL << 9) 455#define BIC_SMI (1ULL << 9)
451#define BIC_CPU_c3 (1ULL << 10) 456#define BIC_sysfs (1ULL << 10)
452#define BIC_CPU_c6 (1ULL << 11) 457#define BIC_CPU_c1 (1ULL << 11)
453#define BIC_CPU_c7 (1ULL << 12) 458#define BIC_CPU_c3 (1ULL << 12)
454#define BIC_ThreadC (1ULL << 13) 459#define BIC_CPU_c6 (1ULL << 13)
455#define BIC_CoreTmp (1ULL << 14) 460#define BIC_CPU_c7 (1ULL << 14)
456#define BIC_CoreCnt (1ULL << 15) 461#define BIC_ThreadC (1ULL << 15)
457#define BIC_PkgTmp (1ULL << 16) 462#define BIC_CoreTmp (1ULL << 16)
458#define BIC_GFX_rc6 (1ULL << 17) 463#define BIC_CoreCnt (1ULL << 17)
459#define BIC_GFXMHz (1ULL << 18) 464#define BIC_PkgTmp (1ULL << 18)
460#define BIC_Pkgpc2 (1ULL << 19) 465#define BIC_GFX_rc6 (1ULL << 19)
461#define BIC_Pkgpc3 (1ULL << 20) 466#define BIC_GFXMHz (1ULL << 20)
462#define BIC_Pkgpc6 (1ULL << 21) 467#define BIC_Pkgpc2 (1ULL << 21)
463#define BIC_Pkgpc7 (1ULL << 22) 468#define BIC_Pkgpc3 (1ULL << 22)
464#define BIC_Pkgpc8 (1ULL << 23) 469#define BIC_Pkgpc6 (1ULL << 23)
465#define BIC_Pkgpc9 (1ULL << 24) 470#define BIC_Pkgpc7 (1ULL << 24)
466#define BIC_Pkgpc10 (1ULL << 25) 471#define BIC_Pkgpc8 (1ULL << 25)
467#define BIC_CPU_LPI (1ULL << 26) 472#define BIC_Pkgpc9 (1ULL << 26)
468#define BIC_SYS_LPI (1ULL << 27) 473#define BIC_Pkgpc10 (1ULL << 27)
469#define BIC_PkgWatt (1ULL << 26) 474#define BIC_CPU_LPI (1ULL << 28)
470#define BIC_CorWatt (1ULL << 27) 475#define BIC_SYS_LPI (1ULL << 29)
471#define BIC_GFXWatt (1ULL << 28) 476#define BIC_PkgWatt (1ULL << 30)
472#define BIC_PkgCnt (1ULL << 29) 477#define BIC_CorWatt (1ULL << 31)
473#define BIC_RAMWatt (1ULL << 30) 478#define BIC_GFXWatt (1ULL << 32)
474#define BIC_PKG__ (1ULL << 31) 479#define BIC_PkgCnt (1ULL << 33)
475#define BIC_RAM__ (1ULL << 32) 480#define BIC_RAMWatt (1ULL << 34)
476#define BIC_Pkg_J (1ULL << 33) 481#define BIC_PKG__ (1ULL << 35)
477#define BIC_Cor_J (1ULL << 34) 482#define BIC_RAM__ (1ULL << 36)
478#define BIC_GFX_J (1ULL << 35) 483#define BIC_Pkg_J (1ULL << 37)
479#define BIC_RAM_J (1ULL << 36) 484#define BIC_Cor_J (1ULL << 38)
480#define BIC_Core (1ULL << 37) 485#define BIC_GFX_J (1ULL << 39)
481#define BIC_CPU (1ULL << 38) 486#define BIC_RAM_J (1ULL << 40)
482#define BIC_Mod_c6 (1ULL << 39) 487#define BIC_Mod_c6 (1ULL << 41)
483#define BIC_sysfs (1ULL << 40) 488#define BIC_Totl_c0 (1ULL << 42)
484#define BIC_Totl_c0 (1ULL << 41) 489#define BIC_Any_c0 (1ULL << 43)
485#define BIC_Any_c0 (1ULL << 42) 490#define BIC_GFX_c0 (1ULL << 44)
486#define BIC_GFX_c0 (1ULL << 43) 491#define BIC_CPUGFX (1ULL << 45)
487#define BIC_CPUGFX (1ULL << 44) 492#define BIC_Core (1ULL << 46)
488#define BIC_Node (1ULL << 45) 493#define BIC_CPU (1ULL << 47)
489 494#define BIC_APIC (1ULL << 48)
490#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) 495#define BIC_X2APIC (1ULL << 49)
496
497#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
491 498
492unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 499unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
493unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs; 500unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
494 501
495#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 502#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
496#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 503#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
@@ -517,17 +524,34 @@ void help(void)
517 "when COMMAND completes.\n" 524 "when COMMAND completes.\n"
518 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 525 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
519 "to print statistics, until interrupted.\n" 526 "to print statistics, until interrupted.\n"
520 "--add add a counter\n" 527 " -a, --add add a counter\n"
521 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 528 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
522 "--cpu cpu-set limit output to summary plus cpu-set:\n" 529 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
523 " {core | package | j,k,l..m,n-p }\n" 530 " {core | package | j,k,l..m,n-p }\n"
524 "--quiet skip decoding system configuration header\n" 531 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
525 "--interval sec.subsec Override default 5-second measurement interval\n" 532 " -D, --Dump displays the raw counter values\n"
526 "--help print this help message\n" 533 " -e, --enable [all | column]\n"
527 "--list list column headers only\n" 534 " shows all or the specified disabled column\n"
528 "--num_iterations num number of the measurement iterations\n" 535 " -H, --hide [column|column,column,...]\n"
529 "--out file create or truncate \"file\" for all output\n" 536 " hide the specified column(s)\n"
530 "--version print version information\n" 537 " -i, --interval sec.subsec\n"
538 " Override default 5-second measurement interval\n"
539 " -J, --Joules displays energy in Joules instead of Watts\n"
540 " -l, --list list column headers only\n"
541 " -n, --num_iterations num\n"
542 " number of the measurement iterations\n"
543 " -o, --out file\n"
544 " create or truncate \"file\" for all output\n"
545 " -q, --quiet skip decoding system configuration header\n"
546 " -s, --show [column|column,column,...]\n"
547 " show only the specified column(s)\n"
548 " -S, --Summary\n"
549 " limits output to 1-line system summary per interval\n"
550 " -T, --TCC temperature\n"
551 " sets the Thermal Control Circuit temperature in\n"
552 " degrees Celsius\n"
553 " -h, --help print this help message\n"
554 " -v, --version print version information\n"
531 "\n" 555 "\n"
532 "For more help, run \"man turbostat\"\n"); 556 "For more help, run \"man turbostat\"\n");
533} 557}
@@ -601,6 +625,10 @@ void print_header(char *delim)
601 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 625 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
602 if (DO_BIC(BIC_CPU)) 626 if (DO_BIC(BIC_CPU))
603 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 627 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
628 if (DO_BIC(BIC_APIC))
629 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
630 if (DO_BIC(BIC_X2APIC))
631 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
604 if (DO_BIC(BIC_Avg_MHz)) 632 if (DO_BIC(BIC_Avg_MHz))
605 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 633 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
606 if (DO_BIC(BIC_Busy)) 634 if (DO_BIC(BIC_Busy))
@@ -880,6 +908,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
880 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 908 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
881 if (DO_BIC(BIC_CPU)) 909 if (DO_BIC(BIC_CPU))
882 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 910 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
911 if (DO_BIC(BIC_APIC))
912 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
913 if (DO_BIC(BIC_X2APIC))
914 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
883 } else { 915 } else {
884 if (DO_BIC(BIC_Package)) { 916 if (DO_BIC(BIC_Package)) {
885 if (p) 917 if (p)
@@ -904,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
904 } 936 }
905 if (DO_BIC(BIC_CPU)) 937 if (DO_BIC(BIC_CPU))
906 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 938 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
939 if (DO_BIC(BIC_APIC))
940 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
941 if (DO_BIC(BIC_X2APIC))
942 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
907 } 943 }
908 944
909 if (DO_BIC(BIC_Avg_MHz)) 945 if (DO_BIC(BIC_Avg_MHz))
@@ -1231,6 +1267,12 @@ delta_thread(struct thread_data *new, struct thread_data *old,
1231 int i; 1267 int i;
1232 struct msr_counter *mp; 1268 struct msr_counter *mp;
1233 1269
1270 /* we run cpuid just the 1st time, copy the results */
1271 if (DO_BIC(BIC_APIC))
1272 new->apic_id = old->apic_id;
1273 if (DO_BIC(BIC_X2APIC))
1274 new->x2apic_id = old->x2apic_id;
1275
1234 /* 1276 /*
1235 * the timestamps from start of measurement interval are in "old" 1277 * the timestamps from start of measurement interval are in "old"
1236 * the timestamp from end of measurement interval are in "new" 1278 * the timestamp from end of measurement interval are in "new"
@@ -1393,6 +1435,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
1393 int i; 1435 int i;
1394 struct msr_counter *mp; 1436 struct msr_counter *mp;
1395 1437
1438 /* copy un-changing apic_id's */
1439 if (DO_BIC(BIC_APIC))
1440 average.threads.apic_id = t->apic_id;
1441 if (DO_BIC(BIC_X2APIC))
1442 average.threads.x2apic_id = t->x2apic_id;
1443
1396 /* remember first tv_begin */ 1444 /* remember first tv_begin */
1397 if (average.threads.tv_begin.tv_sec == 0) 1445 if (average.threads.tv_begin.tv_sec == 0)
1398 average.threads.tv_begin = t->tv_begin; 1446 average.threads.tv_begin = t->tv_begin;
@@ -1619,6 +1667,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1619 return 0; 1667 return 0;
1620} 1668}
1621 1669
1670void get_apic_id(struct thread_data *t)
1671{
1672 unsigned int eax, ebx, ecx, edx, max_level;
1673
1674 eax = ebx = ecx = edx = 0;
1675
1676 if (!genuine_intel)
1677 return;
1678
1679 __cpuid(0, max_level, ebx, ecx, edx);
1680
1681 __cpuid(1, eax, ebx, ecx, edx);
1682 t->apic_id = (ebx >> 24) & 0xf;
1683
1684 if (max_level < 0xb)
1685 return;
1686
1687 if (!DO_BIC(BIC_X2APIC))
1688 return;
1689
1690 ecx = 0;
1691 __cpuid(0xb, eax, ebx, ecx, edx);
1692 t->x2apic_id = edx;
1693
1694 if (debug && (t->apic_id != t->x2apic_id))
1695 fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
1696}
1697
1622/* 1698/*
1623 * get_counters(...) 1699 * get_counters(...)
1624 * migrate to cpu 1700 * migrate to cpu
@@ -1632,7 +1708,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1632 struct msr_counter *mp; 1708 struct msr_counter *mp;
1633 int i; 1709 int i;
1634 1710
1635
1636 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 1711 gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1637 1712
1638 if (cpu_migrate(cpu)) { 1713 if (cpu_migrate(cpu)) {
@@ -1640,6 +1715,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1640 return -1; 1715 return -1;
1641 } 1716 }
1642 1717
1718 if (first_counter_read)
1719 get_apic_id(t);
1643retry: 1720retry:
1644 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 1721 t->tsc = rdtsc(); /* we are running on local CPU of interest */
1645 1722
@@ -2432,6 +2509,12 @@ void set_node_data(void)
2432 if (pni[pkg].count > topo.nodes_per_pkg) 2509 if (pni[pkg].count > topo.nodes_per_pkg)
2433 topo.nodes_per_pkg = pni[0].count; 2510 topo.nodes_per_pkg = pni[0].count;
2434 2511
2512 /* Fake 1 node per pkg for machines that don't
2513 * expose nodes and thus avoid -nan results
2514 */
2515 if (topo.nodes_per_pkg == 0)
2516 topo.nodes_per_pkg = 1;
2517
2435 for (cpu = 0; cpu < topo.num_cpus; cpu++) { 2518 for (cpu = 0; cpu < topo.num_cpus; cpu++) {
2436 pkg = cpus[cpu].physical_package_id; 2519 pkg = cpus[cpu].physical_package_id;
2437 node = cpus[cpu].physical_node_id; 2520 node = cpus[cpu].physical_node_id;
@@ -2879,6 +2962,7 @@ void do_sleep(void)
2879 } 2962 }
2880} 2963}
2881 2964
2965
2882void turbostat_loop() 2966void turbostat_loop()
2883{ 2967{
2884 int retval; 2968 int retval;
@@ -2892,6 +2976,7 @@ restart:
2892 2976
2893 snapshot_proc_sysfs_files(); 2977 snapshot_proc_sysfs_files();
2894 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 2978 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2979 first_counter_read = 0;
2895 if (retval < -1) { 2980 if (retval < -1) {
2896 exit(retval); 2981 exit(retval);
2897 } else if (retval == -1) { 2982 } else if (retval == -1) {
@@ -4392,7 +4477,7 @@ void process_cpuid()
4392 if (!quiet) { 4477 if (!quiet) {
4393 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 4478 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4394 max_level, family, model, stepping, family, model, stepping); 4479 max_level, family, model, stepping, family, model, stepping);
4395 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", 4480 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4396 ecx & (1 << 0) ? "SSE3" : "-", 4481 ecx & (1 << 0) ? "SSE3" : "-",
4397 ecx & (1 << 3) ? "MONITOR" : "-", 4482 ecx & (1 << 3) ? "MONITOR" : "-",
4398 ecx & (1 << 6) ? "SMX" : "-", 4483 ecx & (1 << 6) ? "SMX" : "-",
@@ -4401,6 +4486,7 @@ void process_cpuid()
4401 edx & (1 << 4) ? "TSC" : "-", 4486 edx & (1 << 4) ? "TSC" : "-",
4402 edx & (1 << 5) ? "MSR" : "-", 4487 edx & (1 << 5) ? "MSR" : "-",
4403 edx & (1 << 22) ? "ACPI-TM" : "-", 4488 edx & (1 << 22) ? "ACPI-TM" : "-",
4489 edx & (1 << 28) ? "HT" : "-",
4404 edx & (1 << 29) ? "TM" : "-"); 4490 edx & (1 << 29) ? "TM" : "-");
4405 } 4491 }
4406 4492
@@ -4652,7 +4738,6 @@ void process_cpuid()
4652 return; 4738 return;
4653} 4739}
4654 4740
4655
4656/* 4741/*
4657 * in /dev/cpu/ return success for names that are numbers 4742 * in /dev/cpu/ return success for names that are numbers
4658 * ie. filter out ".", "..", "microcode". 4743 * ie. filter out ".", "..", "microcode".
@@ -4842,6 +4927,13 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4842 struct core_data *c; 4927 struct core_data *c;
4843 struct pkg_data *p; 4928 struct pkg_data *p;
4844 4929
4930
4931 /* Workaround for systems where physical_node_id==-1
4932 * and logical_node_id==(-1 - topo.num_cpus)
4933 */
4934 if (node_id < 0)
4935 node_id = 0;
4936
4845 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 4937 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
4846 c = GET_CORE(core_base, core_id, node_id, pkg_id); 4938 c = GET_CORE(core_base, core_id, node_id, pkg_id);
4847 p = GET_PKG(pkg_base, pkg_id); 4939 p = GET_PKG(pkg_base, pkg_id);
@@ -4946,6 +5038,7 @@ int fork_it(char **argv)
4946 5038
4947 snapshot_proc_sysfs_files(); 5039 snapshot_proc_sysfs_files();
4948 status = for_all_cpus(get_counters, EVEN_COUNTERS); 5040 status = for_all_cpus(get_counters, EVEN_COUNTERS);
5041 first_counter_read = 0;
4949 if (status) 5042 if (status)
4950 exit(status); 5043 exit(status);
4951 /* clear affinity side-effect of get_counters() */ 5044 /* clear affinity side-effect of get_counters() */
@@ -5009,7 +5102,7 @@ int get_and_dump_counters(void)
5009} 5102}
5010 5103
5011void print_version() { 5104void print_version() {
5012 fprintf(outf, "turbostat version 18.06.01" 5105 fprintf(outf, "turbostat version 18.06.20"
5013 " - Len Brown <lenb@kernel.org>\n"); 5106 " - Len Brown <lenb@kernel.org>\n");
5014} 5107}
5015 5108
@@ -5381,7 +5474,7 @@ void cmdline(int argc, char **argv)
5381 break; 5474 break;
5382 case 'e': 5475 case 'e':
5383 /* --enable specified counter */ 5476 /* --enable specified counter */
5384 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 5477 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5385 break; 5478 break;
5386 case 'd': 5479 case 'd':
5387 debug++; 5480 debug++;
@@ -5465,7 +5558,6 @@ void cmdline(int argc, char **argv)
5465int main(int argc, char **argv) 5558int main(int argc, char **argv)
5466{ 5559{
5467 outf = stderr; 5560 outf = stderr;
5468
5469 cmdline(argc, argv); 5561 cmdline(argc, argv);
5470 5562
5471 if (!quiet) 5563 if (!quiet)