diff options
| author | Len Brown <len.brown@intel.com> | 2012-03-29 22:19:58 -0400 |
|---|---|---|
| committer | Len Brown <len.brown@intel.com> | 2012-03-29 22:27:19 -0400 |
| commit | 15aaa34654831e98dd76f7738b6c7f5d05a66430 (patch) | |
| tree | d4c4449fdba32e04f791bdc92fbd4042abb18d17 | |
| parent | 88c3281f7ba449992f7a33bd2452a8c6fa5503cb (diff) | |
tools turbostat: harden against cpu online/offline
Sometimes users have turbostat running in interval mode
when they take processors offline/online.
Previously, turbostat would survive, but not gracefully.
Tighten up the error checking so turbostat notices
changesn sooner, and print just 1 line on change:
turbostat: re-initialized with num_cpus %d
Signed-off-by: Len Brown <len.brown@intel.com>
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 115 |
1 files changed, 61 insertions, 54 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa60872b9474..ab2f682fd44c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
| @@ -71,7 +71,6 @@ unsigned int show_cpu; | |||
| 71 | int aperf_mperf_unstable; | 71 | int aperf_mperf_unstable; |
| 72 | int backwards_count; | 72 | int backwards_count; |
| 73 | char *progname; | 73 | char *progname; |
| 74 | int need_reinitialize; | ||
| 75 | 74 | ||
| 76 | int num_cpus; | 75 | int num_cpus; |
| 77 | cpu_set_t *cpu_mask; | 76 | cpu_set_t *cpu_mask; |
| @@ -138,30 +137,24 @@ int cpu_migrate(int cpu) | |||
| 138 | return 0; | 137 | return 0; |
| 139 | } | 138 | } |
| 140 | 139 | ||
| 141 | unsigned long long get_msr(int cpu, off_t offset) | 140 | int get_msr(int cpu, off_t offset, unsigned long long *msr) |
| 142 | { | 141 | { |
| 143 | ssize_t retval; | 142 | ssize_t retval; |
| 144 | unsigned long long msr; | ||
| 145 | char pathname[32]; | 143 | char pathname[32]; |
| 146 | int fd; | 144 | int fd; |
| 147 | 145 | ||
| 148 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); | 146 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); |
| 149 | fd = open(pathname, O_RDONLY); | 147 | fd = open(pathname, O_RDONLY); |
| 150 | if (fd < 0) { | 148 | if (fd < 0) |
| 151 | perror(pathname); | 149 | return -1; |
| 152 | need_reinitialize = 1; | ||
| 153 | return 0; | ||
| 154 | } | ||
| 155 | |||
| 156 | retval = pread(fd, &msr, sizeof msr, offset); | ||
| 157 | if (retval != sizeof msr) { | ||
| 158 | fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", | ||
| 159 | cpu, offset, retval); | ||
| 160 | exit(-2); | ||
| 161 | } | ||
| 162 | 150 | ||
| 151 | retval = pread(fd, msr, sizeof *msr, offset); | ||
| 163 | close(fd); | 152 | close(fd); |
| 164 | return msr; | 153 | |
| 154 | if (retval != sizeof *msr) | ||
| 155 | return -1; | ||
| 156 | |||
| 157 | return 0; | ||
| 165 | } | 158 | } |
| 166 | 159 | ||
| 167 | void print_header(void) | 160 | void print_header(void) |
| @@ -506,36 +499,51 @@ void compute_average(struct counters *delta, struct counters *avg) | |||
| 506 | free(sum); | 499 | free(sum); |
| 507 | } | 500 | } |
| 508 | 501 | ||
| 509 | void get_counters(struct counters *cnt) | 502 | int get_counters(struct counters *cnt) |
| 510 | { | 503 | { |
| 511 | for ( ; cnt; cnt = cnt->next) { | 504 | for ( ; cnt; cnt = cnt->next) { |
| 512 | if (cpu_migrate(cnt->cpu)) { | 505 | |
| 513 | need_reinitialize = 1; | 506 | if (cpu_migrate(cnt->cpu)) |
| 514 | return; | 507 | return -1; |
| 508 | |||
| 509 | if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) | ||
| 510 | return -1; | ||
| 511 | |||
| 512 | if (has_aperf) { | ||
| 513 | if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) | ||
| 514 | return -1; | ||
| 515 | if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) | ||
| 516 | return -1; | ||
| 517 | } | ||
| 518 | |||
| 519 | if (do_nhm_cstates) { | ||
| 520 | if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) | ||
| 521 | return -1; | ||
| 522 | if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) | ||
| 523 | return -1; | ||
| 515 | } | 524 | } |
| 516 | 525 | ||
| 517 | cnt->tsc = get_msr(cnt->cpu, MSR_TSC); | ||
| 518 | if (do_nhm_cstates) | ||
| 519 | cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); | ||
| 520 | if (do_nhm_cstates) | ||
| 521 | cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY); | ||
| 522 | if (do_snb_cstates) | ||
| 523 | cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY); | ||
| 524 | if (has_aperf) | ||
| 525 | cnt->aperf = get_msr(cnt->cpu, MSR_APERF); | ||
| 526 | if (has_aperf) | ||
| 527 | cnt->mperf = get_msr(cnt->cpu, MSR_MPERF); | ||
| 528 | if (do_snb_cstates) | ||
| 529 | cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY); | ||
| 530 | if (do_nhm_cstates) | ||
| 531 | cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY); | ||
| 532 | if (do_nhm_cstates) | ||
| 533 | cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY); | ||
| 534 | if (do_snb_cstates) | 526 | if (do_snb_cstates) |
| 535 | cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY); | 527 | if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) |
| 528 | return -1; | ||
| 529 | |||
| 530 | if (do_nhm_cstates) { | ||
| 531 | if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) | ||
| 532 | return -1; | ||
| 533 | if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) | ||
| 534 | return -1; | ||
| 535 | } | ||
| 536 | if (do_snb_cstates) { | ||
| 537 | if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) | ||
| 538 | return -1; | ||
| 539 | if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) | ||
| 540 | return -1; | ||
| 541 | } | ||
| 536 | if (extra_msr_offset) | 542 | if (extra_msr_offset) |
| 537 | cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset); | 543 | if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) |
| 544 | return -1; | ||
| 538 | } | 545 | } |
| 546 | return 0; | ||
| 539 | } | 547 | } |
| 540 | 548 | ||
| 541 | void print_nehalem_info(void) | 549 | void print_nehalem_info(void) |
| @@ -546,7 +554,7 @@ void print_nehalem_info(void) | |||
| 546 | if (!do_nehalem_platform_info) | 554 | if (!do_nehalem_platform_info) |
| 547 | return; | 555 | return; |
| 548 | 556 | ||
| 549 | msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); | 557 | get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); |
| 550 | 558 | ||
| 551 | ratio = (msr >> 40) & 0xFF; | 559 | ratio = (msr >> 40) & 0xFF; |
| 552 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", | 560 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", |
| @@ -562,7 +570,7 @@ void print_nehalem_info(void) | |||
| 562 | if (!do_nehalem_turbo_ratio_limit) | 570 | if (!do_nehalem_turbo_ratio_limit) |
| 563 | return; | 571 | return; |
| 564 | 572 | ||
| 565 | msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); | 573 | get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); |
| 566 | 574 | ||
| 567 | ratio = (msr >> 24) & 0xFF; | 575 | ratio = (msr >> 24) & 0xFF; |
| 568 | if (ratio) | 576 | if (ratio) |
| @@ -755,7 +763,7 @@ int get_core_id(int cpu) | |||
| 755 | } | 763 | } |
| 756 | 764 | ||
| 757 | /* | 765 | /* |
| 758 | * run func(index, cpu) on every cpu in /proc/stat | 766 | * run func(pkg, core, cpu) on every cpu in /proc/stat |
| 759 | */ | 767 | */ |
| 760 | 768 | ||
| 761 | int for_all_cpus(void (func)(int, int, int)) | 769 | int for_all_cpus(void (func)(int, int, int)) |
| @@ -791,20 +799,18 @@ int for_all_cpus(void (func)(int, int, int)) | |||
| 791 | 799 | ||
| 792 | void re_initialize(void) | 800 | void re_initialize(void) |
| 793 | { | 801 | { |
| 794 | printf("turbostat: topology changed, re-initializing.\n"); | ||
| 795 | free_all_counters(); | 802 | free_all_counters(); |
| 796 | num_cpus = for_all_cpus(alloc_new_counters); | 803 | num_cpus = for_all_cpus(alloc_new_counters); |
| 797 | need_reinitialize = 0; | ||
| 798 | cpu_mask_uninit(); | 804 | cpu_mask_uninit(); |
| 799 | cpu_mask_init(num_cpus); | 805 | cpu_mask_init(num_cpus); |
| 800 | printf("num_cpus is now %d\n", num_cpus); | 806 | printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); |
| 801 | } | 807 | } |
| 802 | 808 | ||
| 803 | void dummy(int pkg, int core, int cpu) { return; } | 809 | void dummy(int pkg, int core, int cpu) { return; } |
| 804 | /* | 810 | /* |
| 805 | * check to see if a cpu came on-line | 811 | * check to see if a cpu came on-line |
| 806 | */ | 812 | */ |
| 807 | void verify_num_cpus(void) | 813 | int verify_num_cpus(void) |
| 808 | { | 814 | { |
| 809 | int new_num_cpus; | 815 | int new_num_cpus; |
| 810 | 816 | ||
| @@ -814,8 +820,9 @@ void verify_num_cpus(void) | |||
| 814 | if (verbose) | 820 | if (verbose) |
| 815 | printf("num_cpus was %d, is now %d\n", | 821 | printf("num_cpus was %d, is now %d\n", |
| 816 | num_cpus, new_num_cpus); | 822 | num_cpus, new_num_cpus); |
| 817 | need_reinitialize = 1; | 823 | return -1; |
| 818 | } | 824 | } |
| 825 | return 0; | ||
| 819 | } | 826 | } |
| 820 | 827 | ||
| 821 | void turbostat_loop() | 828 | void turbostat_loop() |
| @@ -825,25 +832,25 @@ restart: | |||
| 825 | gettimeofday(&tv_even, (struct timezone *)NULL); | 832 | gettimeofday(&tv_even, (struct timezone *)NULL); |
| 826 | 833 | ||
| 827 | while (1) { | 834 | while (1) { |
| 828 | verify_num_cpus(); | 835 | if (verify_num_cpus()) { |
| 829 | if (need_reinitialize) { | ||
| 830 | re_initialize(); | 836 | re_initialize(); |
| 831 | goto restart; | 837 | goto restart; |
| 832 | } | 838 | } |
| 833 | sleep(interval_sec); | 839 | sleep(interval_sec); |
| 834 | get_counters(cnt_odd); | 840 | if (get_counters(cnt_odd)) { |
| 841 | re_initialize(); | ||
| 842 | goto restart; | ||
| 843 | } | ||
| 835 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 844 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
| 836 | |||
| 837 | compute_delta(cnt_odd, cnt_even, cnt_delta); | 845 | compute_delta(cnt_odd, cnt_even, cnt_delta); |
| 838 | timersub(&tv_odd, &tv_even, &tv_delta); | 846 | timersub(&tv_odd, &tv_even, &tv_delta); |
| 839 | compute_average(cnt_delta, cnt_average); | 847 | compute_average(cnt_delta, cnt_average); |
| 840 | print_counters(cnt_delta); | 848 | print_counters(cnt_delta); |
| 841 | if (need_reinitialize) { | 849 | sleep(interval_sec); |
| 850 | if (get_counters(cnt_even)) { | ||
| 842 | re_initialize(); | 851 | re_initialize(); |
| 843 | goto restart; | 852 | goto restart; |
| 844 | } | 853 | } |
| 845 | sleep(interval_sec); | ||
| 846 | get_counters(cnt_even); | ||
| 847 | gettimeofday(&tv_even, (struct timezone *)NULL); | 854 | gettimeofday(&tv_even, (struct timezone *)NULL); |
| 848 | compute_delta(cnt_even, cnt_odd, cnt_delta); | 855 | compute_delta(cnt_even, cnt_odd, cnt_delta); |
| 849 | timersub(&tv_even, &tv_odd, &tv_delta); | 856 | timersub(&tv_even, &tv_odd, &tv_delta); |
