diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-05-31 14:39:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-05-31 14:39:25 -0400 |
commit | 50f5a1ee32ba2dd8f7221e4d841249edf81b9075 (patch) | |
tree | c5962d0c67a692793f893e69153ab0161faa862a /tools | |
parent | dae8f283bf30738593f6d2a4623945c5e6d7794e (diff) | |
parent | a68c7c3ff0469d79993ee85e8e0a3a9a568ce350 (diff) |
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull turbostat tool fixes from Len Brown:
"Just one minor kernel dependency in this batch -- added a #define to
msr-index.h"
* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
tools/power turbostat: update version number to 4.7
tools/power turbostat: allow running without cpu0
tools/power turbostat: correctly decode of ENERGY_PERFORMANCE_BIAS
tools/power turbostat: enable turbostat to support Knights Landing (KNL)
tools/power turbostat: correctly display more than 2 threads/core
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 224 |
1 files changed, 184 insertions, 40 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bac98ca3d4ca..323b65edfc97 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -52,6 +52,7 @@ unsigned int skip_c0; | |||
52 | unsigned int skip_c1; | 52 | unsigned int skip_c1; |
53 | unsigned int do_nhm_cstates; | 53 | unsigned int do_nhm_cstates; |
54 | unsigned int do_snb_cstates; | 54 | unsigned int do_snb_cstates; |
55 | unsigned int do_knl_cstates; | ||
55 | unsigned int do_pc2; | 56 | unsigned int do_pc2; |
56 | unsigned int do_pc3; | 57 | unsigned int do_pc3; |
57 | unsigned int do_pc6; | 58 | unsigned int do_pc6; |
@@ -91,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons; | |||
91 | unsigned int do_ring_perf_limit_reasons; | 92 | unsigned int do_ring_perf_limit_reasons; |
92 | unsigned int crystal_hz; | 93 | unsigned int crystal_hz; |
93 | unsigned long long tsc_hz; | 94 | unsigned long long tsc_hz; |
95 | int base_cpu; | ||
94 | 96 | ||
95 | #define RAPL_PKG (1 << 0) | 97 | #define RAPL_PKG (1 << 0) |
96 | /* 0x610 MSR_PKG_POWER_LIMIT */ | 98 | /* 0x610 MSR_PKG_POWER_LIMIT */ |
@@ -316,7 +318,7 @@ void print_header(void) | |||
316 | 318 | ||
317 | if (do_nhm_cstates) | 319 | if (do_nhm_cstates) |
318 | outp += sprintf(outp, " CPU%%c1"); | 320 | outp += sprintf(outp, " CPU%%c1"); |
319 | if (do_nhm_cstates && !do_slm_cstates) | 321 | if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) |
320 | outp += sprintf(outp, " CPU%%c3"); | 322 | outp += sprintf(outp, " CPU%%c3"); |
321 | if (do_nhm_cstates) | 323 | if (do_nhm_cstates) |
322 | outp += sprintf(outp, " CPU%%c6"); | 324 | outp += sprintf(outp, " CPU%%c6"); |
@@ -546,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
546 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 548 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
547 | goto done; | 549 | goto done; |
548 | 550 | ||
549 | if (do_nhm_cstates && !do_slm_cstates) | 551 | if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) |
550 | outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); | 552 | outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); |
551 | if (do_nhm_cstates) | 553 | if (do_nhm_cstates) |
552 | outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); | 554 | outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); |
@@ -1018,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1018 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 1020 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
1019 | return 0; | 1021 | return 0; |
1020 | 1022 | ||
1021 | if (do_nhm_cstates && !do_slm_cstates) { | 1023 | if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { |
1022 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | 1024 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) |
1023 | return -6; | 1025 | return -6; |
1024 | } | 1026 | } |
1025 | 1027 | ||
1026 | if (do_nhm_cstates) { | 1028 | if (do_nhm_cstates && !do_knl_cstates) { |
1027 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) | 1029 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) |
1028 | return -7; | 1030 | return -7; |
1031 | } else if (do_knl_cstates) { | ||
1032 | if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) | ||
1033 | return -7; | ||
1029 | } | 1034 | } |
1030 | 1035 | ||
1031 | if (do_snb_cstates) | 1036 | if (do_snb_cstates) |
@@ -1150,7 +1155,7 @@ dump_nhm_platform_info(void) | |||
1150 | unsigned long long msr; | 1155 | unsigned long long msr; |
1151 | unsigned int ratio; | 1156 | unsigned int ratio; |
1152 | 1157 | ||
1153 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); | 1158 | get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); |
1154 | 1159 | ||
1155 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); | 1160 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); |
1156 | 1161 | ||
@@ -1162,7 +1167,7 @@ dump_nhm_platform_info(void) | |||
1162 | fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", | 1167 | fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", |
1163 | ratio, bclk, ratio * bclk); | 1168 | ratio, bclk, ratio * bclk); |
1164 | 1169 | ||
1165 | get_msr(0, MSR_IA32_POWER_CTL, &msr); | 1170 | get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); |
1166 | fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", | 1171 | fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", |
1167 | msr, msr & 0x2 ? "EN" : "DIS"); | 1172 | msr, msr & 0x2 ? "EN" : "DIS"); |
1168 | 1173 | ||
@@ -1175,7 +1180,7 @@ dump_hsw_turbo_ratio_limits(void) | |||
1175 | unsigned long long msr; | 1180 | unsigned long long msr; |
1176 | unsigned int ratio; | 1181 | unsigned int ratio; |
1177 | 1182 | ||
1178 | get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr); | 1183 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); |
1179 | 1184 | ||
1180 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); | 1185 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); |
1181 | 1186 | ||
@@ -1197,7 +1202,7 @@ dump_ivt_turbo_ratio_limits(void) | |||
1197 | unsigned long long msr; | 1202 | unsigned long long msr; |
1198 | unsigned int ratio; | 1203 | unsigned int ratio; |
1199 | 1204 | ||
1200 | get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr); | 1205 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); |
1201 | 1206 | ||
1202 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); | 1207 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); |
1203 | 1208 | ||
@@ -1249,7 +1254,7 @@ dump_nhm_turbo_ratio_limits(void) | |||
1249 | unsigned long long msr; | 1254 | unsigned long long msr; |
1250 | unsigned int ratio; | 1255 | unsigned int ratio; |
1251 | 1256 | ||
1252 | get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr); | 1257 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); |
1253 | 1258 | ||
1254 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); | 1259 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
1255 | 1260 | ||
@@ -1296,11 +1301,72 @@ dump_nhm_turbo_ratio_limits(void) | |||
1296 | } | 1301 | } |
1297 | 1302 | ||
1298 | static void | 1303 | static void |
1304 | dump_knl_turbo_ratio_limits(void) | ||
1305 | { | ||
1306 | int cores; | ||
1307 | unsigned int ratio; | ||
1308 | unsigned long long msr; | ||
1309 | int delta_cores; | ||
1310 | int delta_ratio; | ||
1311 | int i; | ||
1312 | |||
1313 | get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | ||
1314 | |||
1315 | fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", | ||
1316 | msr); | ||
1317 | |||
1318 | /** | ||
1319 | * Turbo encoding in KNL is as follows: | ||
1320 | * [7:0] -- Base value of number of active cores of bucket 1. | ||
1321 | * [15:8] -- Base value of freq ratio of bucket 1. | ||
1322 | * [20:16] -- +ve delta of number of active cores of bucket 2. | ||
1323 | * i.e. active cores of bucket 2 = | ||
1324 | * active cores of bucket 1 + delta | ||
1325 | * [23:21] -- Negative delta of freq ratio of bucket 2. | ||
1326 | * i.e. freq ratio of bucket 2 = | ||
1327 | * freq ratio of bucket 1 - delta | ||
1328 | * [28:24]-- +ve delta of number of active cores of bucket 3. | ||
1329 | * [31:29]-- -ve delta of freq ratio of bucket 3. | ||
1330 | * [36:32]-- +ve delta of number of active cores of bucket 4. | ||
1331 | * [39:37]-- -ve delta of freq ratio of bucket 4. | ||
1332 | * [44:40]-- +ve delta of number of active cores of bucket 5. | ||
1333 | * [47:45]-- -ve delta of freq ratio of bucket 5. | ||
1334 | * [52:48]-- +ve delta of number of active cores of bucket 6. | ||
1335 | * [55:53]-- -ve delta of freq ratio of bucket 6. | ||
1336 | * [60:56]-- +ve delta of number of active cores of bucket 7. | ||
1337 | * [63:61]-- -ve delta of freq ratio of bucket 7. | ||
1338 | */ | ||
1339 | cores = msr & 0xFF; | ||
1340 | ratio = (msr >> 8) && 0xFF; | ||
1341 | if (ratio > 0) | ||
1342 | fprintf(stderr, | ||
1343 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
1344 | ratio, bclk, ratio * bclk, cores); | ||
1345 | |||
1346 | for (i = 16; i < 64; i = i + 8) { | ||
1347 | delta_cores = (msr >> i) & 0x1F; | ||
1348 | delta_ratio = (msr >> (i + 5)) && 0x7; | ||
1349 | if (!delta_cores || !delta_ratio) | ||
1350 | return; | ||
1351 | cores = cores + delta_cores; | ||
1352 | ratio = ratio - delta_ratio; | ||
1353 | |||
1354 | /** -ve ratios will make successive ratio calculations | ||
1355 | * negative. Hence return instead of carrying on. | ||
1356 | */ | ||
1357 | if (ratio > 0) | ||
1358 | fprintf(stderr, | ||
1359 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
1360 | ratio, bclk, ratio * bclk, cores); | ||
1361 | } | ||
1362 | } | ||
1363 | |||
1364 | static void | ||
1299 | dump_nhm_cst_cfg(void) | 1365 | dump_nhm_cst_cfg(void) |
1300 | { | 1366 | { |
1301 | unsigned long long msr; | 1367 | unsigned long long msr; |
1302 | 1368 | ||
1303 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | 1369 | get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); |
1304 | 1370 | ||
1305 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | 1371 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) |
1306 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | 1372 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
@@ -1381,12 +1447,41 @@ int parse_int_file(const char *fmt, ...) | |||
1381 | } | 1447 | } |
1382 | 1448 | ||
1383 | /* | 1449 | /* |
1384 | * cpu_is_first_sibling_in_core(cpu) | 1450 | * get_cpu_position_in_core(cpu) |
1385 | * return 1 if given CPU is 1st HT sibling in the core | 1451 | * return the position of the CPU among its HT siblings in the core |
1452 | * return -1 if the sibling is not in list | ||
1386 | */ | 1453 | */ |
1387 | int cpu_is_first_sibling_in_core(int cpu) | 1454 | int get_cpu_position_in_core(int cpu) |
1388 | { | 1455 | { |
1389 | return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | 1456 | char path[64]; |
1457 | FILE *filep; | ||
1458 | int this_cpu; | ||
1459 | char character; | ||
1460 | int i; | ||
1461 | |||
1462 | sprintf(path, | ||
1463 | "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", | ||
1464 | cpu); | ||
1465 | filep = fopen(path, "r"); | ||
1466 | if (filep == NULL) { | ||
1467 | perror(path); | ||
1468 | exit(1); | ||
1469 | } | ||
1470 | |||
1471 | for (i = 0; i < topo.num_threads_per_core; i++) { | ||
1472 | fscanf(filep, "%d", &this_cpu); | ||
1473 | if (this_cpu == cpu) { | ||
1474 | fclose(filep); | ||
1475 | return i; | ||
1476 | } | ||
1477 | |||
1478 | /* Account for no separator after last thread*/ | ||
1479 | if (i != (topo.num_threads_per_core - 1)) | ||
1480 | fscanf(filep, "%c", &character); | ||
1481 | } | ||
1482 | |||
1483 | fclose(filep); | ||
1484 | return -1; | ||
1390 | } | 1485 | } |
1391 | 1486 | ||
1392 | /* | 1487 | /* |
@@ -1412,25 +1507,31 @@ int get_num_ht_siblings(int cpu) | |||
1412 | { | 1507 | { |
1413 | char path[80]; | 1508 | char path[80]; |
1414 | FILE *filep; | 1509 | FILE *filep; |
1415 | int sib1, sib2; | 1510 | int sib1; |
1416 | int matches; | 1511 | int matches = 0; |
1417 | char character; | 1512 | char character; |
1513 | char str[100]; | ||
1514 | char *ch; | ||
1418 | 1515 | ||
1419 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | 1516 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); |
1420 | filep = fopen_or_die(path, "r"); | 1517 | filep = fopen_or_die(path, "r"); |
1518 | |||
1421 | /* | 1519 | /* |
1422 | * file format: | 1520 | * file format: |
1423 | * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) | 1521 | * A ',' separated or '-' separated set of numbers |
1424 | * otherwinse 1 sibling (self). | 1522 | * (eg 1-2 or 1,3,4,5) |
1425 | */ | 1523 | */ |
1426 | matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); | 1524 | fscanf(filep, "%d%c\n", &sib1, &character); |
1525 | fseek(filep, 0, SEEK_SET); | ||
1526 | fgets(str, 100, filep); | ||
1527 | ch = strchr(str, character); | ||
1528 | while (ch != NULL) { | ||
1529 | matches++; | ||
1530 | ch = strchr(ch+1, character); | ||
1531 | } | ||
1427 | 1532 | ||
1428 | fclose(filep); | 1533 | fclose(filep); |
1429 | 1534 | return matches+1; | |
1430 | if (matches == 3) | ||
1431 | return 2; | ||
1432 | else | ||
1433 | return 1; | ||
1434 | } | 1535 | } |
1435 | 1536 | ||
1436 | /* | 1537 | /* |
@@ -1594,8 +1695,10 @@ restart: | |||
1594 | void check_dev_msr() | 1695 | void check_dev_msr() |
1595 | { | 1696 | { |
1596 | struct stat sb; | 1697 | struct stat sb; |
1698 | char pathname[32]; | ||
1597 | 1699 | ||
1598 | if (stat("/dev/cpu/0/msr", &sb)) | 1700 | sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); |
1701 | if (stat(pathname, &sb)) | ||
1599 | if (system("/sbin/modprobe msr > /dev/null 2>&1")) | 1702 | if (system("/sbin/modprobe msr > /dev/null 2>&1")) |
1600 | err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); | 1703 | err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); |
1601 | } | 1704 | } |
@@ -1608,6 +1711,7 @@ void check_permissions() | |||
1608 | cap_user_data_t cap_data = &cap_data_data; | 1711 | cap_user_data_t cap_data = &cap_data_data; |
1609 | extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); | 1712 | extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); |
1610 | int do_exit = 0; | 1713 | int do_exit = 0; |
1714 | char pathname[32]; | ||
1611 | 1715 | ||
1612 | /* check for CAP_SYS_RAWIO */ | 1716 | /* check for CAP_SYS_RAWIO */ |
1613 | cap_header->pid = getpid(); | 1717 | cap_header->pid = getpid(); |
@@ -1622,7 +1726,8 @@ void check_permissions() | |||
1622 | } | 1726 | } |
1623 | 1727 | ||
1624 | /* test file permissions */ | 1728 | /* test file permissions */ |
1625 | if (euidaccess("/dev/cpu/0/msr", R_OK)) { | 1729 | sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); |
1730 | if (euidaccess(pathname, R_OK)) { | ||
1626 | do_exit++; | 1731 | do_exit++; |
1627 | warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); | 1732 | warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); |
1628 | } | 1733 | } |
@@ -1704,7 +1809,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) | |||
1704 | default: | 1809 | default: |
1705 | return 0; | 1810 | return 0; |
1706 | } | 1811 | } |
1707 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | 1812 | get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); |
1708 | 1813 | ||
1709 | pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; | 1814 | pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; |
1710 | 1815 | ||
@@ -1753,6 +1858,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1753 | } | 1858 | } |
1754 | } | 1859 | } |
1755 | 1860 | ||
1861 | int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) | ||
1862 | { | ||
1863 | if (!genuine_intel) | ||
1864 | return 0; | ||
1865 | |||
1866 | if (family != 6) | ||
1867 | return 0; | ||
1868 | |||
1869 | switch (model) { | ||
1870 | case 0x57: /* Knights Landing */ | ||
1871 | return 1; | ||
1872 | default: | ||
1873 | return 0; | ||
1874 | } | ||
1875 | } | ||
1756 | static void | 1876 | static void |
1757 | dump_cstate_pstate_config_info(family, model) | 1877 | dump_cstate_pstate_config_info(family, model) |
1758 | { | 1878 | { |
@@ -1770,6 +1890,9 @@ dump_cstate_pstate_config_info(family, model) | |||
1770 | if (has_nhm_turbo_ratio_limit(family, model)) | 1890 | if (has_nhm_turbo_ratio_limit(family, model)) |
1771 | dump_nhm_turbo_ratio_limits(); | 1891 | dump_nhm_turbo_ratio_limits(); |
1772 | 1892 | ||
1893 | if (has_knl_turbo_ratio_limit(family, model)) | ||
1894 | dump_knl_turbo_ratio_limits(); | ||
1895 | |||
1773 | dump_nhm_cst_cfg(); | 1896 | dump_nhm_cst_cfg(); |
1774 | } | 1897 | } |
1775 | 1898 | ||
@@ -1801,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1801 | if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) | 1924 | if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) |
1802 | return 0; | 1925 | return 0; |
1803 | 1926 | ||
1804 | switch (msr & 0x7) { | 1927 | switch (msr & 0xF) { |
1805 | case ENERGY_PERF_BIAS_PERFORMANCE: | 1928 | case ENERGY_PERF_BIAS_PERFORMANCE: |
1806 | epb_string = "performance"; | 1929 | epb_string = "performance"; |
1807 | break; | 1930 | break; |
@@ -1925,7 +2048,7 @@ double get_tdp(model) | |||
1925 | unsigned long long msr; | 2048 | unsigned long long msr; |
1926 | 2049 | ||
1927 | if (do_rapl & RAPL_PKG_POWER_INFO) | 2050 | if (do_rapl & RAPL_PKG_POWER_INFO) |
1928 | if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) | 2051 | if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) |
1929 | return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; | 2052 | return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; |
1930 | 2053 | ||
1931 | switch (model) { | 2054 | switch (model) { |
@@ -1950,6 +2073,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) | |||
1950 | case 0x3F: /* HSX */ | 2073 | case 0x3F: /* HSX */ |
1951 | case 0x4F: /* BDX */ | 2074 | case 0x4F: /* BDX */ |
1952 | case 0x56: /* BDX-DE */ | 2075 | case 0x56: /* BDX-DE */ |
2076 | case 0x57: /* KNL */ | ||
1953 | return (rapl_dram_energy_units = 15.3 / 1000000); | 2077 | return (rapl_dram_energy_units = 15.3 / 1000000); |
1954 | default: | 2078 | default: |
1955 | return (rapl_energy_units); | 2079 | return (rapl_energy_units); |
@@ -1991,6 +2115,7 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
1991 | case 0x3F: /* HSX */ | 2115 | case 0x3F: /* HSX */ |
1992 | case 0x4F: /* BDX */ | 2116 | case 0x4F: /* BDX */ |
1993 | case 0x56: /* BDX-DE */ | 2117 | case 0x56: /* BDX-DE */ |
2118 | case 0x57: /* KNL */ | ||
1994 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; | 2119 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; |
1995 | break; | 2120 | break; |
1996 | case 0x2D: | 2121 | case 0x2D: |
@@ -2006,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
2006 | } | 2131 | } |
2007 | 2132 | ||
2008 | /* units on package 0, verify later other packages match */ | 2133 | /* units on package 0, verify later other packages match */ |
2009 | if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) | 2134 | if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) |
2010 | return; | 2135 | return; |
2011 | 2136 | ||
2012 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); | 2137 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); |
@@ -2331,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model) | |||
2331 | return 0; | 2456 | return 0; |
2332 | } | 2457 | } |
2333 | 2458 | ||
2459 | int is_knl(unsigned int family, unsigned int model) | ||
2460 | { | ||
2461 | if (!genuine_intel) | ||
2462 | return 0; | ||
2463 | switch (model) { | ||
2464 | case 0x57: /* KNL */ | ||
2465 | return 1; | ||
2466 | } | ||
2467 | return 0; | ||
2468 | } | ||
2469 | |||
2334 | #define SLM_BCLK_FREQS 5 | 2470 | #define SLM_BCLK_FREQS 5 |
2335 | double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; | 2471 | double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; |
2336 | 2472 | ||
@@ -2340,7 +2476,7 @@ double slm_bclk(void) | |||
2340 | unsigned int i; | 2476 | unsigned int i; |
2341 | double freq; | 2477 | double freq; |
2342 | 2478 | ||
2343 | if (get_msr(0, MSR_FSB_FREQ, &msr)) | 2479 | if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) |
2344 | fprintf(stderr, "SLM BCLK: unknown\n"); | 2480 | fprintf(stderr, "SLM BCLK: unknown\n"); |
2345 | 2481 | ||
2346 | i = msr & 0xf; | 2482 | i = msr & 0xf; |
@@ -2408,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
2408 | if (!do_nhm_platform_info) | 2544 | if (!do_nhm_platform_info) |
2409 | goto guess; | 2545 | goto guess; |
2410 | 2546 | ||
2411 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) | 2547 | if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) |
2412 | goto guess; | 2548 | goto guess; |
2413 | 2549 | ||
2414 | target_c_local = (msr >> 16) & 0xFF; | 2550 | target_c_local = (msr >> 16) & 0xFF; |
@@ -2541,6 +2677,7 @@ void process_cpuid() | |||
2541 | do_c8_c9_c10 = has_hsw_msrs(family, model); | 2677 | do_c8_c9_c10 = has_hsw_msrs(family, model); |
2542 | do_skl_residency = has_skl_msrs(family, model); | 2678 | do_skl_residency = has_skl_msrs(family, model); |
2543 | do_slm_cstates = is_slm(family, model); | 2679 | do_slm_cstates = is_slm(family, model); |
2680 | do_knl_cstates = is_knl(family, model); | ||
2544 | bclk = discover_bclk(family, model); | 2681 | bclk = discover_bclk(family, model); |
2545 | 2682 | ||
2546 | rapl_probe(family, model); | 2683 | rapl_probe(family, model); |
@@ -2755,13 +2892,9 @@ int initialize_counters(int cpu_id) | |||
2755 | 2892 | ||
2756 | my_package_id = get_physical_package_id(cpu_id); | 2893 | my_package_id = get_physical_package_id(cpu_id); |
2757 | my_core_id = get_core_id(cpu_id); | 2894 | my_core_id = get_core_id(cpu_id); |
2758 | 2895 | my_thread_id = get_cpu_position_in_core(cpu_id); | |
2759 | if (cpu_is_first_sibling_in_core(cpu_id)) { | 2896 | if (!my_thread_id) |
2760 | my_thread_id = 0; | ||
2761 | topo.num_cores++; | 2897 | topo.num_cores++; |
2762 | } else { | ||
2763 | my_thread_id = 1; | ||
2764 | } | ||
2765 | 2898 | ||
2766 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | 2899 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); |
2767 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | 2900 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); |
@@ -2785,13 +2918,24 @@ void setup_all_buffers(void) | |||
2785 | for_all_proc_cpus(initialize_counters); | 2918 | for_all_proc_cpus(initialize_counters); |
2786 | } | 2919 | } |
2787 | 2920 | ||
2921 | void set_base_cpu(void) | ||
2922 | { | ||
2923 | base_cpu = sched_getcpu(); | ||
2924 | if (base_cpu < 0) | ||
2925 | err(-ENODEV, "No valid cpus found"); | ||
2926 | |||
2927 | if (debug > 1) | ||
2928 | fprintf(stderr, "base_cpu = %d\n", base_cpu); | ||
2929 | } | ||
2930 | |||
2788 | void turbostat_init() | 2931 | void turbostat_init() |
2789 | { | 2932 | { |
2933 | setup_all_buffers(); | ||
2934 | set_base_cpu(); | ||
2790 | check_dev_msr(); | 2935 | check_dev_msr(); |
2791 | check_permissions(); | 2936 | check_permissions(); |
2792 | process_cpuid(); | 2937 | process_cpuid(); |
2793 | 2938 | ||
2794 | setup_all_buffers(); | ||
2795 | 2939 | ||
2796 | if (debug) | 2940 | if (debug) |
2797 | for_all_cpus(print_epb, ODD_COUNTERS); | 2941 | for_all_cpus(print_epb, ODD_COUNTERS); |
@@ -2870,7 +3014,7 @@ int get_and_dump_counters(void) | |||
2870 | } | 3014 | } |
2871 | 3015 | ||
2872 | void print_version() { | 3016 | void print_version() { |
2873 | fprintf(stderr, "turbostat version 4.5 2 Apr, 2015" | 3017 | fprintf(stderr, "turbostat version 4.7 27-May, 2015" |
2874 | " - Len Brown <lenb@kernel.org>\n"); | 3018 | " - Len Brown <lenb@kernel.org>\n"); |
2875 | } | 3019 | } |
2876 | 3020 | ||