diff options
author | Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com> | 2015-05-20 12:49:34 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2015-05-27 18:03:57 -0400 |
commit | fb5d432722e186c656285ccc088e35dbe24f6fd1 (patch) | |
tree | ae8f87435f209fe4c21121e0bd42301814db617b /tools | |
parent | e275b3885dffd31095984ed2476ed0447fa7309a (diff) |
tools/power turbostat: enable turbostat to support Knights Landing (KNL)
Changes mainly to account for minor differences in Knights Landing(KNL):
1. KNL supports C1 and C6 core states.
2. KNL supports PC2, PC3 and PC6 package states.
3. KNL has a different encoding of the TURBO_RATIO_LIMIT MSR
Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 105 |
1 files changed, 101 insertions, 4 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d85adbafbe60..256a5e1de381 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -52,6 +52,7 @@ unsigned int skip_c0; | |||
52 | unsigned int skip_c1; | 52 | unsigned int skip_c1; |
53 | unsigned int do_nhm_cstates; | 53 | unsigned int do_nhm_cstates; |
54 | unsigned int do_snb_cstates; | 54 | unsigned int do_snb_cstates; |
55 | unsigned int do_knl_cstates; | ||
55 | unsigned int do_pc2; | 56 | unsigned int do_pc2; |
56 | unsigned int do_pc3; | 57 | unsigned int do_pc3; |
57 | unsigned int do_pc6; | 58 | unsigned int do_pc6; |
@@ -316,7 +317,7 @@ void print_header(void) | |||
316 | 317 | ||
317 | if (do_nhm_cstates) | 318 | if (do_nhm_cstates) |
318 | outp += sprintf(outp, " CPU%%c1"); | 319 | outp += sprintf(outp, " CPU%%c1"); |
319 | if (do_nhm_cstates && !do_slm_cstates) | 320 | if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) |
320 | outp += sprintf(outp, " CPU%%c3"); | 321 | outp += sprintf(outp, " CPU%%c3"); |
321 | if (do_nhm_cstates) | 322 | if (do_nhm_cstates) |
322 | outp += sprintf(outp, " CPU%%c6"); | 323 | outp += sprintf(outp, " CPU%%c6"); |
@@ -546,7 +547,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
546 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 547 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
547 | goto done; | 548 | goto done; |
548 | 549 | ||
549 | if (do_nhm_cstates && !do_slm_cstates) | 550 | if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) |
550 | outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); | 551 | outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); |
551 | if (do_nhm_cstates) | 552 | if (do_nhm_cstates) |
552 | outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); | 553 | outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); |
@@ -1018,14 +1019,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1018 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 1019 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
1019 | return 0; | 1020 | return 0; |
1020 | 1021 | ||
1021 | if (do_nhm_cstates && !do_slm_cstates) { | 1022 | if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { |
1022 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | 1023 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) |
1023 | return -6; | 1024 | return -6; |
1024 | } | 1025 | } |
1025 | 1026 | ||
1026 | if (do_nhm_cstates) { | 1027 | if (do_nhm_cstates && !do_knl_cstates) { |
1027 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) | 1028 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) |
1028 | return -7; | 1029 | return -7; |
1030 | } else if (do_knl_cstates) { | ||
1031 | if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) | ||
1032 | return -7; | ||
1029 | } | 1033 | } |
1030 | 1034 | ||
1031 | if (do_snb_cstates) | 1035 | if (do_snb_cstates) |
@@ -1296,6 +1300,67 @@ dump_nhm_turbo_ratio_limits(void) | |||
1296 | } | 1300 | } |
1297 | 1301 | ||
1298 | static void | 1302 | static void |
1303 | dump_knl_turbo_ratio_limits(void) | ||
1304 | { | ||
1305 | int cores; | ||
1306 | unsigned int ratio; | ||
1307 | unsigned long long msr; | ||
1308 | int delta_cores; | ||
1309 | int delta_ratio; | ||
1310 | int i; | ||
1311 | |||
1312 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | ||
1313 | |||
1314 | fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", | ||
1315 | msr); | ||
1316 | |||
1317 | /** | ||
1318 | * Turbo encoding in KNL is as follows: | ||
1319 | * [7:0] -- Base value of number of active cores of bucket 1. | ||
1320 | * [15:8] -- Base value of freq ratio of bucket 1. | ||
1321 | * [20:16] -- +ve delta of number of active cores of bucket 2. | ||
1322 | * i.e. active cores of bucket 2 = | ||
1323 | * active cores of bucket 1 + delta | ||
1324 | * [23:21] -- Negative delta of freq ratio of bucket 2. | ||
1325 | * i.e. freq ratio of bucket 2 = | ||
1326 | * freq ratio of bucket 1 - delta | ||
1327 | * [28:24]-- +ve delta of number of active cores of bucket 3. | ||
1328 | * [31:29]-- -ve delta of freq ratio of bucket 3. | ||
1329 | * [36:32]-- +ve delta of number of active cores of bucket 4. | ||
1330 | * [39:37]-- -ve delta of freq ratio of bucket 4. | ||
1331 | * [44:40]-- +ve delta of number of active cores of bucket 5. | ||
1332 | * [47:45]-- -ve delta of freq ratio of bucket 5. | ||
1333 | * [52:48]-- +ve delta of number of active cores of bucket 6. | ||
1334 | * [55:53]-- -ve delta of freq ratio of bucket 6. | ||
1335 | * [60:56]-- +ve delta of number of active cores of bucket 7. | ||
1336 | * [63:61]-- -ve delta of freq ratio of bucket 7. | ||
1337 | */ | ||
1338 | cores = msr & 0xFF; | ||
1339 | ratio = (msr >> 8) && 0xFF; | ||
1340 | if (ratio > 0) | ||
1341 | fprintf(stderr, | ||
1342 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
1343 | ratio, bclk, ratio * bclk, cores); | ||
1344 | |||
1345 | for (i = 16; i < 64; i = i + 8) { | ||
1346 | delta_cores = (msr >> i) & 0x1F; | ||
1347 | delta_ratio = (msr >> (i + 5)) && 0x7; | ||
1348 | if (!delta_cores || !delta_ratio) | ||
1349 | return; | ||
1350 | cores = cores + delta_cores; | ||
1351 | ratio = ratio - delta_ratio; | ||
1352 | |||
1353 | /** -ve ratios will make successive ratio calculations | ||
1354 | * negative. Hence return instead of carrying on. | ||
1355 | */ | ||
1356 | if (ratio > 0) | ||
1357 | fprintf(stderr, | ||
1358 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
1359 | ratio, bclk, ratio * bclk, cores); | ||
1360 | } | ||
1361 | } | ||
1362 | |||
1363 | static void | ||
1299 | dump_nhm_cst_cfg(void) | 1364 | dump_nhm_cst_cfg(void) |
1300 | { | 1365 | { |
1301 | unsigned long long msr; | 1366 | unsigned long long msr; |
@@ -1788,6 +1853,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1788 | } | 1853 | } |
1789 | } | 1854 | } |
1790 | 1855 | ||
1856 | int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) | ||
1857 | { | ||
1858 | if (!genuine_intel) | ||
1859 | return 0; | ||
1860 | |||
1861 | if (family != 6) | ||
1862 | return 0; | ||
1863 | |||
1864 | switch (model) { | ||
1865 | case 0x57: /* Knights Landing */ | ||
1866 | return 1; | ||
1867 | default: | ||
1868 | return 0; | ||
1869 | } | ||
1870 | } | ||
1791 | static void | 1871 | static void |
1792 | dump_cstate_pstate_config_info(family, model) | 1872 | dump_cstate_pstate_config_info(family, model) |
1793 | { | 1873 | { |
@@ -1805,6 +1885,9 @@ dump_cstate_pstate_config_info(family, model) | |||
1805 | if (has_nhm_turbo_ratio_limit(family, model)) | 1885 | if (has_nhm_turbo_ratio_limit(family, model)) |
1806 | dump_nhm_turbo_ratio_limits(); | 1886 | dump_nhm_turbo_ratio_limits(); |
1807 | 1887 | ||
1888 | if (has_knl_turbo_ratio_limit(family, model)) | ||
1889 | dump_knl_turbo_ratio_limits(); | ||
1890 | |||
1808 | dump_nhm_cst_cfg(); | 1891 | dump_nhm_cst_cfg(); |
1809 | } | 1892 | } |
1810 | 1893 | ||
@@ -1985,6 +2068,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) | |||
1985 | case 0x3F: /* HSX */ | 2068 | case 0x3F: /* HSX */ |
1986 | case 0x4F: /* BDX */ | 2069 | case 0x4F: /* BDX */ |
1987 | case 0x56: /* BDX-DE */ | 2070 | case 0x56: /* BDX-DE */ |
2071 | case 0x57: /* KNL */ | ||
1988 | return (rapl_dram_energy_units = 15.3 / 1000000); | 2072 | return (rapl_dram_energy_units = 15.3 / 1000000); |
1989 | default: | 2073 | default: |
1990 | return (rapl_energy_units); | 2074 | return (rapl_energy_units); |
@@ -2026,6 +2110,7 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
2026 | case 0x3F: /* HSX */ | 2110 | case 0x3F: /* HSX */ |
2027 | case 0x4F: /* BDX */ | 2111 | case 0x4F: /* BDX */ |
2028 | case 0x56: /* BDX-DE */ | 2112 | case 0x56: /* BDX-DE */ |
2113 | case 0x57: /* KNL */ | ||
2029 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; | 2114 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; |
2030 | break; | 2115 | break; |
2031 | case 0x2D: | 2116 | case 0x2D: |
@@ -2366,6 +2451,17 @@ int is_slm(unsigned int family, unsigned int model) | |||
2366 | return 0; | 2451 | return 0; |
2367 | } | 2452 | } |
2368 | 2453 | ||
2454 | int is_knl(unsigned int family, unsigned int model) | ||
2455 | { | ||
2456 | if (!genuine_intel) | ||
2457 | return 0; | ||
2458 | switch (model) { | ||
2459 | case 0x57: /* KNL */ | ||
2460 | return 1; | ||
2461 | } | ||
2462 | return 0; | ||
2463 | } | ||
2464 | |||
2369 | #define SLM_BCLK_FREQS 5 | 2465 | #define SLM_BCLK_FREQS 5 |
2370 | double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; | 2466 | double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; |
2371 | 2467 | ||
@@ -2576,6 +2672,7 @@ void process_cpuid() | |||
2576 | do_c8_c9_c10 = has_hsw_msrs(family, model); | 2672 | do_c8_c9_c10 = has_hsw_msrs(family, model); |
2577 | do_skl_residency = has_skl_msrs(family, model); | 2673 | do_skl_residency = has_skl_msrs(family, model); |
2578 | do_slm_cstates = is_slm(family, model); | 2674 | do_slm_cstates = is_slm(family, model); |
2675 | do_knl_cstates = is_knl(family, model); | ||
2579 | bclk = discover_bclk(family, model); | 2676 | bclk = discover_bclk(family, model); |
2580 | 2677 | ||
2581 | rapl_probe(family, model); | 2678 | rapl_probe(family, model); |