diff options
| -rw-r--r-- | tools/power/x86/turbostat/Makefile | 1 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 77 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1329 |
3 files changed, 868 insertions, 539 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index fd8e1f1297aa..f85649554191 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | turbostat : turbostat.c | 1 | turbostat : turbostat.c |
| 2 | CFLAGS += -Wall | ||
| 2 | 3 | ||
| 3 | clean : | 4 | clean : |
| 4 | rm -f turbostat | 5 | rm -f turbostat |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index adf175f61496..74e44507dfe9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
| @@ -27,7 +27,11 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. | |||
| 27 | on processors that additionally support C-state residency counters. | 27 | on processors that additionally support C-state residency counters. |
| 28 | 28 | ||
| 29 | .SS Options | 29 | .SS Options |
| 30 | The \fB-s\fP option prints only a 1-line summary for each sample interval. | 30 | The \fB-s\fP option limits output to a 1-line system summary for each interval. |
| 31 | .PP | ||
| 32 | The \fB-c\fP option limits output to the 1st thread in each core. | ||
| 33 | .PP | ||
| 34 | The \fB-p\fP option limits output to the 1st thread in each package. | ||
| 31 | .PP | 35 | .PP |
| 32 | The \fB-v\fP option increases verbosity. | 36 | The \fB-v\fP option increases verbosity. |
| 33 | .PP | 37 | .PP |
| @@ -65,19 +69,19 @@ Subsequent rows show per-CPU statistics. | |||
| 65 | .nf | 69 | .nf |
| 66 | [root@x980]# ./turbostat | 70 | [root@x980]# ./turbostat |
| 67 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 71 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
| 68 | 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 | 72 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 |
| 69 | 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 | 73 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 |
| 70 | 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 | 74 | 0 6 0.05 1.62 3.38 10.34 |
| 71 | 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 | 75 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 |
| 72 | 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 | 76 | 1 8 0.03 1.62 3.38 0.06 |
| 73 | 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 | 77 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 |
| 74 | 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 | 78 | 2 10 0.02 1.62 3.38 0.29 |
| 75 | 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 | 79 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 |
| 76 | 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 | 80 | 8 7 0.01 1.62 3.38 0.06 |
| 77 | 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 | 81 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 |
| 78 | 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 | 82 | 9 9 0.02 1.62 3.38 0.60 |
| 79 | 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 | 83 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 |
| 80 | 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 | 84 | 10 11 0.02 1.62 3.38 0.02 |
| 81 | .fi | 85 | .fi |
| 82 | .SH SUMMARY EXAMPLE | 86 | .SH SUMMARY EXAMPLE |
| 83 | The "-s" option prints the column headers just once, | 87 | The "-s" option prints the column headers just once, |
| @@ -86,9 +90,10 @@ and then the one line system summary for each sample interval. | |||
| 86 | .nf | 90 | .nf |
| 87 | [root@x980]# ./turbostat -s | 91 | [root@x980]# ./turbostat -s |
| 88 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 92 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
| 89 | 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 | 93 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 |
| 90 | 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 | 94 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 |
| 91 | 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 | 95 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 |
| 96 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | ||
| 92 | .fi | 97 | .fi |
| 93 | .SH VERBOSE EXAMPLE | 98 | .SH VERBOSE EXAMPLE |
| 94 | The "-v" option adds verbosity to the output: | 99 | The "-v" option adds verbosity to the output: |
| @@ -120,30 +125,28 @@ until ^C while the other CPUs are mostly idle: | |||
| 120 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null | 125 | [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null |
| 121 | ^C | 126 | ^C |
| 122 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 127 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 |
| 123 | 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 | 128 | 8.86 3.61 3.38 15.06 31.19 44.89 0.00 0.00 |
| 124 | 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 | 129 | 0 0 1.46 3.22 3.38 16.84 29.48 52.22 0.00 0.00 |
| 125 | 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 | 130 | 0 6 0.21 3.06 3.38 18.09 |
| 126 | 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 | 131 | 1 2 0.53 3.33 3.38 2.80 46.40 50.27 |
| 127 | 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 | 132 | 1 8 0.89 3.47 3.38 2.44 |
| 128 | 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 | 133 | 2 4 1.36 3.43 3.38 9.04 23.71 65.89 |
| 129 | 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 | 134 | 2 10 0.18 2.86 3.38 10.22 |
| 130 | 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 | 135 | 8 1 0.04 2.87 3.38 99.96 0.01 0.00 |
| 131 | 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 | 136 | 8 7 99.72 3.63 3.38 0.27 |
| 132 | 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 | 137 | 9 3 0.31 3.21 3.38 7.64 56.55 35.50 |
| 133 | 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 | 138 | 9 9 0.08 2.95 3.38 7.88 |
| 134 | 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 | 139 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
| 135 | 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 | 140 | 10 11 0.16 2.88 3.38 3.40 |
| 136 | 4.907015 sec | ||
| 137 | |||
| 138 | .fi | 141 | .fi |
| 139 | Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit | 142 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit |
| 140 | while the other processors are generally in various states of idle. | 143 | while the other processors are generally in various states of idle. |
| 141 | 144 | ||
| 142 | Note that cpu0 is an HT sibling sharing core0 | 145 | Note that cpu1 and cpu7 are HT siblings within core8. |
| 143 | with cpu6, and thus it is unable to get to an idle state | 146 | As cpu7 is very busy, it prevents its sibling, cpu1, |
| 144 | deeper than c1 while cpu6 is busy. | 147 | from entering a c-state deeper than c1. |
| 145 | 148 | ||
| 146 | Note that turbostat reports average GHz of 3.64, while | 149 | Note that turbostat reports average GHz of 3.63, while |
| 147 | the arithmetic average of the GHz column above is lower. | 150 | the arithmetic average of the GHz column above is lower. |
| 148 | This is a weighted average, where the weight is %c0. ie. it is the total number of | 151 | This is a weighted average, where the weight is %c0. ie. it is the total number of |
| 149 | un-halted cycles elapsed per time divided by the number of CPUs. | 152 | un-halted cycles elapsed per time divided by the number of CPUs. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 16de7ad4850f..b815a12159b2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
| @@ -67,92 +67,119 @@ double bclk; | |||
| 67 | unsigned int show_pkg; | 67 | unsigned int show_pkg; |
| 68 | unsigned int show_core; | 68 | unsigned int show_core; |
| 69 | unsigned int show_cpu; | 69 | unsigned int show_cpu; |
| 70 | unsigned int show_pkg_only; | ||
| 71 | unsigned int show_core_only; | ||
| 72 | char *output_buffer, *outp; | ||
| 70 | 73 | ||
| 71 | int aperf_mperf_unstable; | 74 | int aperf_mperf_unstable; |
| 72 | int backwards_count; | 75 | int backwards_count; |
| 73 | char *progname; | 76 | char *progname; |
| 74 | 77 | ||
| 75 | int num_cpus; | 78 | cpu_set_t *cpu_present_set, *cpu_affinity_set; |
| 76 | cpu_set_t *cpu_present_set, *cpu_mask; | 79 | size_t cpu_present_setsize, cpu_affinity_setsize; |
| 77 | size_t cpu_present_setsize, cpu_mask_size; | 80 | |
| 78 | 81 | struct thread_data { | |
| 79 | struct counters { | 82 | unsigned long long tsc; |
| 80 | unsigned long long tsc; /* per thread */ | 83 | unsigned long long aperf; |
| 81 | unsigned long long aperf; /* per thread */ | 84 | unsigned long long mperf; |
| 82 | unsigned long long mperf; /* per thread */ | 85 | unsigned long long c1; /* derived */ |
| 83 | unsigned long long c1; /* per thread (calculated) */ | 86 | unsigned long long extra_msr; |
| 84 | unsigned long long c3; /* per core */ | 87 | unsigned int cpu_id; |
| 85 | unsigned long long c6; /* per core */ | 88 | unsigned int flags; |
| 86 | unsigned long long c7; /* per core */ | 89 | #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 |
| 87 | unsigned long long pc2; /* per package */ | 90 | #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 |
| 88 | unsigned long long pc3; /* per package */ | 91 | } *thread_even, *thread_odd; |
| 89 | unsigned long long pc6; /* per package */ | 92 | |
| 90 | unsigned long long pc7; /* per package */ | 93 | struct core_data { |
| 91 | unsigned long long extra_msr; /* per thread */ | 94 | unsigned long long c3; |
| 92 | int pkg; | 95 | unsigned long long c6; |
| 93 | int core; | 96 | unsigned long long c7; |
| 94 | int cpu; | 97 | unsigned int core_id; |
| 95 | struct counters *next; | 98 | } *core_even, *core_odd; |
| 96 | }; | 99 | |
| 97 | 100 | struct pkg_data { | |
| 98 | struct counters *cnt_even; | 101 | unsigned long long pc2; |
| 99 | struct counters *cnt_odd; | 102 | unsigned long long pc3; |
| 100 | struct counters *cnt_delta; | 103 | unsigned long long pc6; |
| 101 | struct counters *cnt_average; | 104 | unsigned long long pc7; |
| 102 | struct timeval tv_even; | 105 | unsigned int package_id; |
| 103 | struct timeval tv_odd; | 106 | } *package_even, *package_odd; |
| 104 | struct timeval tv_delta; | 107 | |
| 105 | 108 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | |
| 106 | int mark_cpu_present(int pkg, int core, int cpu) | 109 | #define EVEN_COUNTERS thread_even, core_even, package_even |
| 110 | |||
| 111 | #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ | ||
| 112 | (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ | ||
| 113 | topo.num_threads_per_core + \ | ||
| 114 | (core_no) * topo.num_threads_per_core + (thread_no)) | ||
| 115 | #define GET_CORE(core_base, core_no, pkg_no) \ | ||
| 116 | (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) | ||
| 117 | #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) | ||
| 118 | |||
| 119 | struct system_summary { | ||
| 120 | struct thread_data threads; | ||
| 121 | struct core_data cores; | ||
| 122 | struct pkg_data packages; | ||
| 123 | } sum, average; | ||
| 124 | |||
| 125 | |||
| 126 | struct topo_params { | ||
| 127 | int num_packages; | ||
| 128 | int num_cpus; | ||
| 129 | int num_cores; | ||
| 130 | int max_cpu_num; | ||
| 131 | int num_cores_per_pkg; | ||
| 132 | int num_threads_per_core; | ||
| 133 | } topo; | ||
| 134 | |||
| 135 | struct timeval tv_even, tv_odd, tv_delta; | ||
| 136 | |||
| 137 | void setup_all_buffers(void); | ||
| 138 | |||
| 139 | int cpu_is_not_present(int cpu) | ||
| 107 | { | 140 | { |
| 108 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); | 141 | return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); |
| 109 | return 0; | ||
| 110 | } | 142 | } |
| 111 | |||
| 112 | /* | 143 | /* |
| 113 | * cpu_mask_init(ncpus) | 144 | * run func(thread, core, package) in topology order |
| 114 | * | 145 | * skip non-present cpus |
| 115 | * allocate and clear cpu_mask | ||
| 116 | * set cpu_mask_size | ||
| 117 | */ | 146 | */ |
| 118 | void cpu_mask_init(int ncpus) | 147 | |
| 148 | int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), | ||
| 149 | struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) | ||
| 119 | { | 150 | { |
| 120 | cpu_mask = CPU_ALLOC(ncpus); | 151 | int retval, pkg_no, core_no, thread_no; |
| 121 | if (cpu_mask == NULL) { | ||
| 122 | perror("CPU_ALLOC"); | ||
| 123 | exit(3); | ||
| 124 | } | ||
| 125 | cpu_mask_size = CPU_ALLOC_SIZE(ncpus); | ||
| 126 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | ||
| 127 | 152 | ||
| 128 | /* | 153 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { |
| 129 | * Allocate and initialize cpu_present_set | 154 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { |
| 130 | */ | 155 | for (thread_no = 0; thread_no < |
| 131 | cpu_present_set = CPU_ALLOC(ncpus); | 156 | topo.num_threads_per_core; ++thread_no) { |
| 132 | if (cpu_present_set == NULL) { | 157 | struct thread_data *t; |
| 133 | perror("CPU_ALLOC"); | 158 | struct core_data *c; |
| 134 | exit(3); | 159 | struct pkg_data *p; |
| 135 | } | ||
| 136 | cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); | ||
| 137 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
| 138 | for_all_cpus(mark_cpu_present); | ||
| 139 | } | ||
| 140 | 160 | ||
| 141 | void cpu_mask_uninit() | 161 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); |
| 142 | { | 162 | |
| 143 | CPU_FREE(cpu_mask); | 163 | if (cpu_is_not_present(t->cpu_id)) |
| 144 | cpu_mask = NULL; | 164 | continue; |
| 145 | cpu_mask_size = 0; | 165 | |
| 146 | CPU_FREE(cpu_present_set); | 166 | c = GET_CORE(core_base, core_no, pkg_no); |
| 147 | cpu_present_set = NULL; | 167 | p = GET_PKG(pkg_base, pkg_no); |
| 148 | cpu_present_setsize = 0; | 168 | |
| 169 | retval = func(t, c, p); | ||
| 170 | if (retval) | ||
| 171 | return retval; | ||
| 172 | } | ||
| 173 | } | ||
| 174 | } | ||
| 175 | return 0; | ||
| 149 | } | 176 | } |
| 150 | 177 | ||
| 151 | int cpu_migrate(int cpu) | 178 | int cpu_migrate(int cpu) |
| 152 | { | 179 | { |
| 153 | CPU_ZERO_S(cpu_mask_size, cpu_mask); | 180 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); |
| 154 | CPU_SET_S(cpu, cpu_mask_size, cpu_mask); | 181 | CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); |
| 155 | if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) | 182 | if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) |
| 156 | return -1; | 183 | return -1; |
| 157 | else | 184 | else |
| 158 | return 0; | 185 | return 0; |
| @@ -181,67 +208,72 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
| 181 | void print_header(void) | 208 | void print_header(void) |
| 182 | { | 209 | { |
| 183 | if (show_pkg) | 210 | if (show_pkg) |
| 184 | fprintf(stderr, "pk"); | 211 | outp += sprintf(outp, "pk"); |
| 185 | if (show_pkg) | 212 | if (show_pkg) |
| 186 | fprintf(stderr, " "); | 213 | outp += sprintf(outp, " "); |
| 187 | if (show_core) | 214 | if (show_core) |
| 188 | fprintf(stderr, "cor"); | 215 | outp += sprintf(outp, "cor"); |
| 189 | if (show_cpu) | 216 | if (show_cpu) |
| 190 | fprintf(stderr, " CPU"); | 217 | outp += sprintf(outp, " CPU"); |
| 191 | if (show_pkg || show_core || show_cpu) | 218 | if (show_pkg || show_core || show_cpu) |
| 192 | fprintf(stderr, " "); | 219 | outp += sprintf(outp, " "); |
| 193 | if (do_nhm_cstates) | 220 | if (do_nhm_cstates) |
| 194 | fprintf(stderr, " %%c0"); | 221 | outp += sprintf(outp, " %%c0"); |
| 195 | if (has_aperf) | 222 | if (has_aperf) |
| 196 | fprintf(stderr, " GHz"); | 223 | outp += sprintf(outp, " GHz"); |
| 197 | fprintf(stderr, " TSC"); | 224 | outp += sprintf(outp, " TSC"); |
| 198 | if (do_nhm_cstates) | 225 | if (do_nhm_cstates) |
| 199 | fprintf(stderr, " %%c1"); | 226 | outp += sprintf(outp, " %%c1"); |
| 200 | if (do_nhm_cstates) | 227 | if (do_nhm_cstates) |
| 201 | fprintf(stderr, " %%c3"); | 228 | outp += sprintf(outp, " %%c3"); |
| 202 | if (do_nhm_cstates) | 229 | if (do_nhm_cstates) |
| 203 | fprintf(stderr, " %%c6"); | 230 | outp += sprintf(outp, " %%c6"); |
| 204 | if (do_snb_cstates) | 231 | if (do_snb_cstates) |
| 205 | fprintf(stderr, " %%c7"); | 232 | outp += sprintf(outp, " %%c7"); |
| 206 | if (do_snb_cstates) | 233 | if (do_snb_cstates) |
| 207 | fprintf(stderr, " %%pc2"); | 234 | outp += sprintf(outp, " %%pc2"); |
| 208 | if (do_nhm_cstates) | 235 | if (do_nhm_cstates) |
| 209 | fprintf(stderr, " %%pc3"); | 236 | outp += sprintf(outp, " %%pc3"); |
| 210 | if (do_nhm_cstates) | 237 | if (do_nhm_cstates) |
| 211 | fprintf(stderr, " %%pc6"); | 238 | outp += sprintf(outp, " %%pc6"); |
| 212 | if (do_snb_cstates) | 239 | if (do_snb_cstates) |
| 213 | fprintf(stderr, " %%pc7"); | 240 | outp += sprintf(outp, " %%pc7"); |
| 214 | if (extra_msr_offset) | 241 | if (extra_msr_offset) |
| 215 | fprintf(stderr, " MSR 0x%x ", extra_msr_offset); | 242 | outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); |
| 216 | 243 | ||
| 217 | putc('\n', stderr); | 244 | outp += sprintf(outp, "\n"); |
| 218 | } | 245 | } |
| 219 | 246 | ||
| 220 | void dump_cnt(struct counters *cnt) | 247 | int dump_counters(struct thread_data *t, struct core_data *c, |
| 248 | struct pkg_data *p) | ||
| 221 | { | 249 | { |
| 222 | if (!cnt) | 250 | fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); |
| 223 | return; | 251 | |
| 224 | if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg); | 252 | if (t) { |
| 225 | if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core); | 253 | fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); |
| 226 | if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu); | 254 | fprintf(stderr, "TSC: %016llX\n", t->tsc); |
| 227 | if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc); | 255 | fprintf(stderr, "aperf: %016llX\n", t->aperf); |
| 228 | if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3); | 256 | fprintf(stderr, "mperf: %016llX\n", t->mperf); |
| 229 | if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6); | 257 | fprintf(stderr, "c1: %016llX\n", t->c1); |
| 230 | if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7); | 258 | fprintf(stderr, "msr0x%x: %016llX\n", |
| 231 | if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf); | 259 | extra_msr_offset, t->extra_msr); |
| 232 | if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2); | 260 | } |
| 233 | if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3); | ||
| 234 | if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6); | ||
| 235 | if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7); | ||
| 236 | if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr); | ||
| 237 | } | ||
| 238 | 261 | ||
| 239 | void dump_list(struct counters *cnt) | 262 | if (c) { |
| 240 | { | 263 | fprintf(stderr, "core: %d\n", c->core_id); |
| 241 | printf("dump_list 0x%p\n", cnt); | 264 | fprintf(stderr, "c3: %016llX\n", c->c3); |
| 265 | fprintf(stderr, "c6: %016llX\n", c->c6); | ||
| 266 | fprintf(stderr, "c7: %016llX\n", c->c7); | ||
| 267 | } | ||
| 242 | 268 | ||
| 243 | for (; cnt; cnt = cnt->next) | 269 | if (p) { |
| 244 | dump_cnt(cnt); | 270 | fprintf(stderr, "package: %d\n", p->package_id); |
| 271 | fprintf(stderr, "pc2: %016llX\n", p->pc2); | ||
| 272 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | ||
| 273 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | ||
| 274 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | ||
| 275 | } | ||
| 276 | return 0; | ||
| 245 | } | 277 | } |
| 246 | 278 | ||
| 247 | /* | 279 | /* |
| @@ -253,321 +285,385 @@ void dump_list(struct counters *cnt) | |||
| 253 | * TSC: "TSC" 3 columns %3.2 | 285 | * TSC: "TSC" 3 columns %3.2 |
| 254 | * percentage " %pc3" %6.2 | 286 | * percentage " %pc3" %6.2 |
| 255 | */ | 287 | */ |
| 256 | void print_cnt(struct counters *p) | 288 | int format_counters(struct thread_data *t, struct core_data *c, |
| 289 | struct pkg_data *p) | ||
| 257 | { | 290 | { |
| 258 | double interval_float; | 291 | double interval_float; |
| 259 | 292 | ||
| 293 | /* if showing only 1st thread in core and this isn't one, bail out */ | ||
| 294 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
| 295 | return 0; | ||
| 296 | |||
| 297 | /* if showing only 1st thread in pkg and this isn't one, bail out */ | ||
| 298 | if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 299 | return 0; | ||
| 300 | |||
| 260 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; | 301 | interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; |
| 261 | 302 | ||
| 262 | /* topology columns, print blanks on 1st (average) line */ | 303 | /* topo columns, print blanks on 1st (average) line */ |
| 263 | if (p == cnt_average) { | 304 | if (t == &average.threads) { |
| 264 | if (show_pkg) | 305 | if (show_pkg) |
| 265 | fprintf(stderr, " "); | 306 | outp += sprintf(outp, " "); |
| 266 | if (show_pkg && show_core) | 307 | if (show_pkg && show_core) |
| 267 | fprintf(stderr, " "); | 308 | outp += sprintf(outp, " "); |
| 268 | if (show_core) | 309 | if (show_core) |
| 269 | fprintf(stderr, " "); | 310 | outp += sprintf(outp, " "); |
| 270 | if (show_cpu) | 311 | if (show_cpu) |
| 271 | fprintf(stderr, " " " "); | 312 | outp += sprintf(outp, " " " "); |
| 272 | } else { | 313 | } else { |
| 273 | if (show_pkg) | 314 | if (show_pkg) { |
| 274 | fprintf(stderr, "%2d", p->pkg); | 315 | if (p) |
| 316 | outp += sprintf(outp, "%2d", p->package_id); | ||
| 317 | else | ||
| 318 | outp += sprintf(outp, " "); | ||
| 319 | } | ||
| 275 | if (show_pkg && show_core) | 320 | if (show_pkg && show_core) |
| 276 | fprintf(stderr, " "); | 321 | outp += sprintf(outp, " "); |
| 277 | if (show_core) | 322 | if (show_core) { |
| 278 | fprintf(stderr, "%3d", p->core); | 323 | if (c) |
| 324 | outp += sprintf(outp, "%3d", c->core_id); | ||
| 325 | else | ||
| 326 | outp += sprintf(outp, " "); | ||
| 327 | } | ||
| 279 | if (show_cpu) | 328 | if (show_cpu) |
| 280 | fprintf(stderr, " %3d", p->cpu); | 329 | outp += sprintf(outp, " %3d", t->cpu_id); |
| 281 | } | 330 | } |
| 282 | 331 | ||
| 283 | /* %c0 */ | 332 | /* %c0 */ |
| 284 | if (do_nhm_cstates) { | 333 | if (do_nhm_cstates) { |
| 285 | if (show_pkg || show_core || show_cpu) | 334 | if (show_pkg || show_core || show_cpu) |
| 286 | fprintf(stderr, " "); | 335 | outp += sprintf(outp, " "); |
| 287 | if (!skip_c0) | 336 | if (!skip_c0) |
| 288 | fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); | 337 | outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); |
| 289 | else | 338 | else |
| 290 | fprintf(stderr, " ****"); | 339 | outp += sprintf(outp, " ****"); |
| 291 | } | 340 | } |
| 292 | 341 | ||
| 293 | /* GHz */ | 342 | /* GHz */ |
| 294 | if (has_aperf) { | 343 | if (has_aperf) { |
| 295 | if (!aperf_mperf_unstable) { | 344 | if (!aperf_mperf_unstable) { |
| 296 | fprintf(stderr, " %3.2f", | 345 | outp += sprintf(outp, " %3.2f", |
| 297 | 1.0 * p->tsc / units * p->aperf / | 346 | 1.0 * t->tsc / units * t->aperf / |
| 298 | p->mperf / interval_float); | 347 | t->mperf / interval_float); |
| 299 | } else { | 348 | } else { |
| 300 | if (p->aperf > p->tsc || p->mperf > p->tsc) { | 349 | if (t->aperf > t->tsc || t->mperf > t->tsc) { |
| 301 | fprintf(stderr, " ***"); | 350 | outp += sprintf(outp, " ***"); |
| 302 | } else { | 351 | } else { |
| 303 | fprintf(stderr, "%3.1f*", | 352 | outp += sprintf(outp, "%3.1f*", |
| 304 | 1.0 * p->tsc / | 353 | 1.0 * t->tsc / |
| 305 | units * p->aperf / | 354 | units * t->aperf / |
| 306 | p->mperf / interval_float); | 355 | t->mperf / interval_float); |
| 307 | } | 356 | } |
| 308 | } | 357 | } |
| 309 | } | 358 | } |
| 310 | 359 | ||
| 311 | /* TSC */ | 360 | /* TSC */ |
| 312 | fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); | 361 | outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); |
| 313 | 362 | ||
| 314 | if (do_nhm_cstates) { | 363 | if (do_nhm_cstates) { |
| 315 | if (!skip_c1) | 364 | if (!skip_c1) |
| 316 | fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); | 365 | outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); |
| 317 | else | 366 | else |
| 318 | fprintf(stderr, " ****"); | 367 | outp += sprintf(outp, " ****"); |
| 319 | } | 368 | } |
| 369 | |||
| 370 | /* print per-core data only for 1st thread in core */ | ||
| 371 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
| 372 | goto done; | ||
| 373 | |||
| 320 | if (do_nhm_cstates) | 374 | if (do_nhm_cstates) |
| 321 | fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); | 375 | outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); |
| 322 | if (do_nhm_cstates) | 376 | if (do_nhm_cstates) |
| 323 | fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); | 377 | outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); |
| 324 | if (do_snb_cstates) | 378 | if (do_snb_cstates) |
| 325 | fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); | 379 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
| 380 | |||
| 381 | /* print per-package data only for 1st core in package */ | ||
| 382 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 383 | goto done; | ||
| 384 | |||
| 326 | if (do_snb_cstates) | 385 | if (do_snb_cstates) |
| 327 | fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); | 386 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
| 328 | if (do_nhm_cstates) | 387 | if (do_nhm_cstates) |
| 329 | fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); | 388 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); |
| 330 | if (do_nhm_cstates) | 389 | if (do_nhm_cstates) |
| 331 | fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); | 390 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
| 332 | if (do_snb_cstates) | 391 | if (do_snb_cstates) |
| 333 | fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); | 392 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
| 393 | done: | ||
| 334 | if (extra_msr_offset) | 394 | if (extra_msr_offset) |
| 335 | fprintf(stderr, " 0x%016llx", p->extra_msr); | 395 | outp += sprintf(outp, " 0x%016llx", t->extra_msr); |
| 336 | putc('\n', stderr); | 396 | outp += sprintf(outp, "\n"); |
| 397 | |||
| 398 | return 0; | ||
| 337 | } | 399 | } |
| 338 | 400 | ||
| 339 | void print_counters(struct counters *counters) | 401 | void flush_stdout() |
| 402 | { | ||
| 403 | fputs(output_buffer, stdout); | ||
| 404 | outp = output_buffer; | ||
| 405 | } | ||
| 406 | void flush_stderr() | ||
| 407 | { | ||
| 408 | fputs(output_buffer, stderr); | ||
| 409 | outp = output_buffer; | ||
| 410 | } | ||
| 411 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 340 | { | 412 | { |
| 341 | struct counters *cnt; | ||
| 342 | static int printed; | 413 | static int printed; |
| 343 | 414 | ||
| 344 | |||
| 345 | if (!printed || !summary_only) | 415 | if (!printed || !summary_only) |
| 346 | print_header(); | 416 | print_header(); |
| 347 | 417 | ||
| 348 | if (num_cpus > 1) | 418 | if (topo.num_cpus > 1) |
| 349 | print_cnt(cnt_average); | 419 | format_counters(&average.threads, &average.cores, |
| 420 | &average.packages); | ||
| 350 | 421 | ||
| 351 | printed = 1; | 422 | printed = 1; |
| 352 | 423 | ||
| 353 | if (summary_only) | 424 | if (summary_only) |
| 354 | return; | 425 | return; |
| 355 | 426 | ||
| 356 | for (cnt = counters; cnt != NULL; cnt = cnt->next) | 427 | for_all_cpus(format_counters, t, c, p); |
| 357 | print_cnt(cnt); | ||
| 358 | |||
| 359 | } | 428 | } |
| 360 | 429 | ||
| 361 | #define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) | 430 | void |
| 431 | delta_package(struct pkg_data *new, struct pkg_data *old) | ||
| 432 | { | ||
| 433 | old->pc2 = new->pc2 - old->pc2; | ||
| 434 | old->pc3 = new->pc3 - old->pc3; | ||
| 435 | old->pc6 = new->pc6 - old->pc6; | ||
| 436 | old->pc7 = new->pc7 - old->pc7; | ||
| 437 | } | ||
| 362 | 438 | ||
| 363 | int compute_delta(struct counters *after, | 439 | void |
| 364 | struct counters *before, struct counters *delta) | 440 | delta_core(struct core_data *new, struct core_data *old) |
| 365 | { | 441 | { |
| 366 | int errors = 0; | 442 | old->c3 = new->c3 - old->c3; |
| 367 | int perf_err = 0; | 443 | old->c6 = new->c6 - old->c6; |
| 444 | old->c7 = new->c7 - old->c7; | ||
| 445 | } | ||
| 368 | 446 | ||
| 369 | skip_c0 = skip_c1 = 0; | 447 | void |
| 448 | delta_thread(struct thread_data *new, struct thread_data *old, | ||
| 449 | struct core_data *core_delta) | ||
| 450 | { | ||
| 451 | old->tsc = new->tsc - old->tsc; | ||
| 452 | |||
| 453 | /* check for TSC < 1 Mcycles over interval */ | ||
| 454 | if (old->tsc < (1000 * 1000)) { | ||
| 455 | fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); | ||
| 456 | fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); | ||
| 457 | fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); | ||
| 458 | exit(-3); | ||
| 459 | } | ||
| 370 | 460 | ||
| 371 | for ( ; after && before && delta; | 461 | old->c1 = new->c1 - old->c1; |
| 372 | after = after->next, before = before->next, delta = delta->next) { | ||
| 373 | if (before->cpu != after->cpu) { | ||
| 374 | printf("cpu configuration changed: %d != %d\n", | ||
| 375 | before->cpu, after->cpu); | ||
| 376 | return -1; | ||
| 377 | } | ||
| 378 | 462 | ||
| 379 | if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { | 463 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { |
| 380 | fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", | 464 | old->aperf = new->aperf - old->aperf; |
| 381 | before->cpu, before->tsc, after->tsc); | 465 | old->mperf = new->mperf - old->mperf; |
| 382 | errors++; | 466 | } else { |
| 383 | } | ||
| 384 | /* check for TSC < 1 Mcycles over interval */ | ||
| 385 | if (delta->tsc < (1000 * 1000)) { | ||
| 386 | fprintf(stderr, "Insanely slow TSC rate," | ||
| 387 | " TSC stops in idle?\n"); | ||
| 388 | fprintf(stderr, "You can disable all c-states" | ||
| 389 | " by booting with \"idle=poll\"\n"); | ||
| 390 | fprintf(stderr, "or just the deep ones with" | ||
| 391 | " \"processor.max_cstate=1\"\n"); | ||
| 392 | exit(-3); | ||
| 393 | } | ||
| 394 | if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { | ||
| 395 | fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", | ||
| 396 | before->cpu, before->c3, after->c3); | ||
| 397 | errors++; | ||
| 398 | } | ||
| 399 | if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { | ||
| 400 | fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", | ||
| 401 | before->cpu, before->c6, after->c6); | ||
| 402 | errors++; | ||
| 403 | } | ||
| 404 | if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { | ||
| 405 | fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", | ||
| 406 | before->cpu, before->c7, after->c7); | ||
| 407 | errors++; | ||
| 408 | } | ||
| 409 | if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { | ||
| 410 | fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", | ||
| 411 | before->cpu, before->pc2, after->pc2); | ||
| 412 | errors++; | ||
| 413 | } | ||
| 414 | if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { | ||
| 415 | fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", | ||
| 416 | before->cpu, before->pc3, after->pc3); | ||
| 417 | errors++; | ||
| 418 | } | ||
| 419 | if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { | ||
| 420 | fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", | ||
| 421 | before->cpu, before->pc6, after->pc6); | ||
| 422 | errors++; | ||
| 423 | } | ||
| 424 | if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { | ||
| 425 | fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", | ||
| 426 | before->cpu, before->pc7, after->pc7); | ||
| 427 | errors++; | ||
| 428 | } | ||
| 429 | 467 | ||
| 430 | perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); | 468 | if (!aperf_mperf_unstable) { |
| 431 | if (perf_err) { | 469 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); |
| 432 | fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", | 470 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); |
| 433 | before->cpu, before->aperf, after->aperf); | 471 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); |
| 434 | } | ||
| 435 | perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); | ||
| 436 | if (perf_err) { | ||
| 437 | fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", | ||
| 438 | before->cpu, before->mperf, after->mperf); | ||
| 439 | } | ||
| 440 | if (perf_err) { | ||
| 441 | if (!aperf_mperf_unstable) { | ||
| 442 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | ||
| 443 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | ||
| 444 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | ||
| 445 | 472 | ||
| 446 | aperf_mperf_unstable = 1; | 473 | aperf_mperf_unstable = 1; |
| 447 | } | ||
| 448 | /* | ||
| 449 | * mperf delta is likely a huge "positive" number | ||
| 450 | * can not use it for calculating c0 time | ||
| 451 | */ | ||
| 452 | skip_c0 = 1; | ||
| 453 | skip_c1 = 1; | ||
| 454 | } | 474 | } |
| 455 | |||
| 456 | /* | 475 | /* |
| 457 | * As mperf and tsc collection are not atomic, | 476 | * mperf delta is likely a huge "positive" number |
| 458 | * it is possible for mperf's non-halted cycles | 477 | * can not use it for calculating c0 time |
| 459 | * to exceed TSC's all cycles: show c1 = 0% in that case. | ||
| 460 | */ | 478 | */ |
| 461 | if (delta->mperf > delta->tsc) | 479 | skip_c0 = 1; |
| 462 | delta->c1 = 0; | 480 | skip_c1 = 1; |
| 463 | else /* normal case, derive c1 */ | 481 | } |
| 464 | delta->c1 = delta->tsc - delta->mperf | ||
| 465 | - delta->c3 - delta->c6 - delta->c7; | ||
| 466 | 482 | ||
| 467 | if (delta->mperf == 0) | ||
| 468 | delta->mperf = 1; /* divide by 0 protection */ | ||
| 469 | 483 | ||
| 470 | /* | 484 | /* |
| 471 | * for "extra msr", just copy the latest w/o subtracting | 485 | * As mperf and tsc collection are not atomic, |
| 472 | */ | 486 | * it is possible for mperf's non-halted cycles |
| 473 | delta->extra_msr = after->extra_msr; | 487 | * to exceed TSC's all cycles: show c1 = 0% in that case. |
| 474 | if (errors) { | 488 | */ |
| 475 | fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); | 489 | if (old->mperf > old->tsc) |
| 476 | dump_cnt(before); | 490 | old->c1 = 0; |
| 477 | fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); | 491 | else { |
| 478 | dump_cnt(after); | 492 | /* normal case, derive c1 */ |
| 479 | errors = 0; | 493 | old->c1 = old->tsc - old->mperf - core_delta->c3 |
| 480 | } | 494 | - core_delta->c6 - core_delta->c7; |
| 495 | } | ||
| 496 | if (old->mperf == 0) { | ||
| 497 | if (verbose) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); | ||
| 498 | old->mperf = 1; /* divide by 0 protection */ | ||
| 481 | } | 499 | } |
| 500 | |||
| 501 | /* | ||
| 502 | * for "extra msr", just copy the latest w/o subtracting | ||
| 503 | */ | ||
| 504 | old->extra_msr = new->extra_msr; | ||
| 505 | } | ||
| 506 | |||
| 507 | int delta_cpu(struct thread_data *t, struct core_data *c, | ||
| 508 | struct pkg_data *p, struct thread_data *t2, | ||
| 509 | struct core_data *c2, struct pkg_data *p2) | ||
| 510 | { | ||
| 511 | /* calculate core delta only for 1st thread in core */ | ||
| 512 | if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) | ||
| 513 | delta_core(c, c2); | ||
| 514 | |||
| 515 | /* always calculate thread delta */ | ||
| 516 | delta_thread(t, t2, c2); /* c2 is core delta */ | ||
| 517 | |||
| 518 | /* calculate package delta only for 1st core in package */ | ||
| 519 | if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) | ||
| 520 | delta_package(p, p2); | ||
| 521 | |||
| 482 | return 0; | 522 | return 0; |
| 483 | } | 523 | } |
| 484 | 524 | ||
| 485 | void compute_average(struct counters *delta, struct counters *avg) | 525 | void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
| 526 | { | ||
| 527 | t->tsc = 0; | ||
| 528 | t->aperf = 0; | ||
| 529 | t->mperf = 0; | ||
| 530 | t->c1 = 0; | ||
| 531 | |||
| 532 | /* tells format_counters to dump all fields from this set */ | ||
| 533 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
| 534 | |||
| 535 | c->c3 = 0; | ||
| 536 | c->c6 = 0; | ||
| 537 | c->c7 = 0; | ||
| 538 | |||
| 539 | p->pc2 = 0; | ||
| 540 | p->pc3 = 0; | ||
| 541 | p->pc6 = 0; | ||
| 542 | p->pc7 = 0; | ||
| 543 | } | ||
| 544 | int sum_counters(struct thread_data *t, struct core_data *c, | ||
| 545 | struct pkg_data *p) | ||
| 486 | { | 546 | { |
| 487 | struct counters *sum; | 547 | average.threads.tsc += t->tsc; |
| 548 | average.threads.aperf += t->aperf; | ||
| 549 | average.threads.mperf += t->mperf; | ||
| 550 | average.threads.c1 += t->c1; | ||
| 488 | 551 | ||
| 489 | sum = calloc(1, sizeof(struct counters)); | 552 | /* sum per-core values only for 1st thread in core */ |
| 490 | if (sum == NULL) { | 553 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
| 491 | perror("calloc sum"); | 554 | return 0; |
| 492 | exit(1); | ||
| 493 | } | ||
| 494 | 555 | ||
| 495 | for (; delta; delta = delta->next) { | 556 | average.cores.c3 += c->c3; |
| 496 | sum->tsc += delta->tsc; | 557 | average.cores.c6 += c->c6; |
| 497 | sum->c1 += delta->c1; | 558 | average.cores.c7 += c->c7; |
| 498 | sum->c3 += delta->c3; | 559 | |
| 499 | sum->c6 += delta->c6; | 560 | /* sum per-pkg values only for 1st core in pkg */ |
| 500 | sum->c7 += delta->c7; | 561 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
| 501 | sum->aperf += delta->aperf; | 562 | return 0; |
| 502 | sum->mperf += delta->mperf; | 563 | |
| 503 | sum->pc2 += delta->pc2; | 564 | average.packages.pc2 += p->pc2; |
| 504 | sum->pc3 += delta->pc3; | 565 | average.packages.pc3 += p->pc3; |
| 505 | sum->pc6 += delta->pc6; | 566 | average.packages.pc6 += p->pc6; |
| 506 | sum->pc7 += delta->pc7; | 567 | average.packages.pc7 += p->pc7; |
| 507 | } | 568 | |
| 508 | avg->tsc = sum->tsc/num_cpus; | 569 | return 0; |
| 509 | avg->c1 = sum->c1/num_cpus; | 570 | } |
| 510 | avg->c3 = sum->c3/num_cpus; | 571 | /* |
| 511 | avg->c6 = sum->c6/num_cpus; | 572 | * sum the counters for all cpus in the system |
| 512 | avg->c7 = sum->c7/num_cpus; | 573 | * compute the weighted average |
| 513 | avg->aperf = sum->aperf/num_cpus; | 574 | */ |
| 514 | avg->mperf = sum->mperf/num_cpus; | 575 | void compute_average(struct thread_data *t, struct core_data *c, |
| 515 | avg->pc2 = sum->pc2/num_cpus; | 576 | struct pkg_data *p) |
| 516 | avg->pc3 = sum->pc3/num_cpus; | 577 | { |
| 517 | avg->pc6 = sum->pc6/num_cpus; | 578 | clear_counters(&average.threads, &average.cores, &average.packages); |
| 518 | avg->pc7 = sum->pc7/num_cpus; | 579 | |
| 519 | 580 | for_all_cpus(sum_counters, t, c, p); | |
| 520 | free(sum); | 581 | |
| 582 | average.threads.tsc /= topo.num_cpus; | ||
| 583 | average.threads.aperf /= topo.num_cpus; | ||
| 584 | average.threads.mperf /= topo.num_cpus; | ||
| 585 | average.threads.c1 /= topo.num_cpus; | ||
| 586 | |||
| 587 | average.cores.c3 /= topo.num_cores; | ||
| 588 | average.cores.c6 /= topo.num_cores; | ||
| 589 | average.cores.c7 /= topo.num_cores; | ||
| 590 | |||
| 591 | average.packages.pc2 /= topo.num_packages; | ||
| 592 | average.packages.pc3 /= topo.num_packages; | ||
| 593 | average.packages.pc6 /= topo.num_packages; | ||
| 594 | average.packages.pc7 /= topo.num_packages; | ||
| 521 | } | 595 | } |
| 522 | 596 | ||
| 523 | int get_counters(struct counters *cnt) | 597 | static unsigned long long rdtsc(void) |
| 524 | { | 598 | { |
| 525 | for ( ; cnt; cnt = cnt->next) { | 599 | unsigned int low, high; |
| 526 | 600 | ||
| 527 | if (cpu_migrate(cnt->cpu)) | 601 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); |
| 528 | return -1; | ||
| 529 | 602 | ||
| 530 | if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) | 603 | return low | ((unsigned long long)high) << 32; |
| 531 | return -1; | 604 | } |
| 532 | 605 | ||
| 533 | if (has_aperf) { | ||
| 534 | if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) | ||
| 535 | return -1; | ||
| 536 | if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) | ||
| 537 | return -1; | ||
| 538 | } | ||
| 539 | 606 | ||
| 540 | if (do_nhm_cstates) { | 607 | /* |
| 541 | if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) | 608 | * get_counters(...) |
| 542 | return -1; | 609 | * migrate to cpu |
| 543 | if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) | 610 | * acquire and record local counters for that cpu |
| 544 | return -1; | 611 | */ |
| 545 | } | 612 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
| 613 | { | ||
| 614 | int cpu = t->cpu_id; | ||
| 546 | 615 | ||
| 547 | if (do_snb_cstates) | 616 | if (cpu_migrate(cpu)) |
| 548 | if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) | 617 | return -1; |
| 549 | return -1; | ||
| 550 | 618 | ||
| 551 | if (do_nhm_cstates) { | 619 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
| 552 | if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) | 620 | |
| 553 | return -1; | 621 | if (has_aperf) { |
| 554 | if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) | 622 | if (get_msr(cpu, MSR_APERF, &t->aperf)) |
| 555 | return -1; | 623 | return -3; |
| 556 | } | 624 | if (get_msr(cpu, MSR_MPERF, &t->mperf)) |
| 557 | if (do_snb_cstates) { | 625 | return -4; |
| 558 | if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) | 626 | } |
| 559 | return -1; | 627 | |
| 560 | if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) | 628 | if (extra_msr_offset) |
| 561 | return -1; | 629 | if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) |
| 562 | } | 630 | return -5; |
| 563 | if (extra_msr_offset) | 631 | |
| 564 | if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) | 632 | /* collect core counters only for 1st thread in core */ |
| 565 | return -1; | 633 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
| 634 | return 0; | ||
| 635 | |||
| 636 | if (do_nhm_cstates) { | ||
| 637 | if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) | ||
| 638 | return -6; | ||
| 639 | if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) | ||
| 640 | return -7; | ||
| 641 | } | ||
| 642 | |||
| 643 | if (do_snb_cstates) | ||
| 644 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | ||
| 645 | return -8; | ||
| 646 | |||
| 647 | /* collect package counters only for 1st core in package */ | ||
| 648 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 649 | return 0; | ||
| 650 | |||
| 651 | if (do_nhm_cstates) { | ||
| 652 | if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) | ||
| 653 | return -9; | ||
| 654 | if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) | ||
| 655 | return -10; | ||
| 656 | } | ||
| 657 | if (do_snb_cstates) { | ||
| 658 | if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) | ||
| 659 | return -11; | ||
| 660 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | ||
| 661 | return -12; | ||
| 566 | } | 662 | } |
| 567 | return 0; | 663 | return 0; |
| 568 | } | 664 | } |
| 569 | 665 | ||
| 570 | void print_nehalem_info(void) | 666 | void print_verbose_header(void) |
| 571 | { | 667 | { |
| 572 | unsigned long long msr; | 668 | unsigned long long msr; |
| 573 | unsigned int ratio; | 669 | unsigned int ratio; |
| @@ -615,143 +711,82 @@ void print_nehalem_info(void) | |||
| 615 | 711 | ||
| 616 | } | 712 | } |
| 617 | 713 | ||
| 618 | void free_counter_list(struct counters *list) | 714 | void free_all_buffers(void) |
| 619 | { | 715 | { |
| 620 | struct counters *p; | 716 | CPU_FREE(cpu_present_set); |
| 717 | cpu_present_set = NULL; | ||
| 718 | cpu_present_set = 0; | ||
| 621 | 719 | ||
| 622 | for (p = list; p; ) { | 720 | CPU_FREE(cpu_affinity_set); |
| 623 | struct counters *free_me; | 721 | cpu_affinity_set = NULL; |
| 722 | cpu_affinity_setsize = 0; | ||
| 624 | 723 | ||
| 625 | free_me = p; | 724 | free(thread_even); |
| 626 | p = p->next; | 725 | free(core_even); |
| 627 | free(free_me); | 726 | free(package_even); |
| 628 | } | ||
| 629 | } | ||
| 630 | 727 | ||
| 631 | void free_all_counters(void) | 728 | thread_even = NULL; |
| 632 | { | 729 | core_even = NULL; |
| 633 | free_counter_list(cnt_even); | 730 | package_even = NULL; |
| 634 | cnt_even = NULL; | ||
| 635 | 731 | ||
| 636 | free_counter_list(cnt_odd); | 732 | free(thread_odd); |
| 637 | cnt_odd = NULL; | 733 | free(core_odd); |
| 734 | free(package_odd); | ||
| 638 | 735 | ||
| 639 | free_counter_list(cnt_delta); | 736 | thread_odd = NULL; |
| 640 | cnt_delta = NULL; | 737 | core_odd = NULL; |
| 738 | package_odd = NULL; | ||
| 641 | 739 | ||
| 642 | free_counter_list(cnt_average); | 740 | free(output_buffer); |
| 643 | cnt_average = NULL; | 741 | output_buffer = NULL; |
| 742 | outp = NULL; | ||
| 644 | } | 743 | } |
| 645 | 744 | ||
| 646 | void insert_counters(struct counters **list, | 745 | /* |
| 647 | struct counters *new) | 746 | * cpu_is_first_sibling_in_core(cpu) |
| 747 | * return 1 if given CPU is 1st HT sibling in the core | ||
| 748 | */ | ||
| 749 | int cpu_is_first_sibling_in_core(int cpu) | ||
| 648 | { | 750 | { |
| 649 | struct counters *prev; | 751 | char path[64]; |
| 650 | 752 | FILE *filep; | |
| 651 | /* | 753 | int first_cpu; |
| 652 | * list was empty | ||
| 653 | */ | ||
| 654 | if (*list == NULL) { | ||
| 655 | new->next = *list; | ||
| 656 | *list = new; | ||
| 657 | return; | ||
| 658 | } | ||
| 659 | |||
| 660 | if (!summary_only) | ||
| 661 | show_cpu = 1; /* there is more than one CPU */ | ||
| 662 | |||
| 663 | /* | ||
| 664 | * insert on front of list. | ||
| 665 | * It is sorted by ascending package#, core#, cpu# | ||
| 666 | */ | ||
| 667 | if (((*list)->pkg > new->pkg) || | ||
| 668 | (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || | ||
| 669 | (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { | ||
| 670 | new->next = *list; | ||
| 671 | *list = new; | ||
| 672 | return; | ||
| 673 | } | ||
| 674 | |||
| 675 | prev = *list; | ||
| 676 | |||
| 677 | while (prev->next && (prev->next->pkg < new->pkg)) { | ||
| 678 | prev = prev->next; | ||
| 679 | if (!summary_only) | ||
| 680 | show_pkg = 1; /* there is more than 1 package */ | ||
| 681 | } | ||
| 682 | |||
| 683 | while (prev->next && (prev->next->pkg == new->pkg) | ||
| 684 | && (prev->next->core < new->core)) { | ||
| 685 | prev = prev->next; | ||
| 686 | if (!summary_only) | ||
| 687 | show_core = 1; /* there is more than 1 core */ | ||
| 688 | } | ||
| 689 | 754 | ||
| 690 | while (prev->next && (prev->next->pkg == new->pkg) | 755 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); |
| 691 | && (prev->next->core == new->core) | 756 | filep = fopen(path, "r"); |
| 692 | && (prev->next->cpu < new->cpu)) { | 757 | if (filep == NULL) { |
| 693 | prev = prev->next; | 758 | perror(path); |
| 759 | exit(1); | ||
| 694 | } | 760 | } |
| 695 | 761 | fscanf(filep, "%d", &first_cpu); | |
| 696 | /* | 762 | fclose(filep); |
| 697 | * insert after "prev" | 763 | return (cpu == first_cpu); |
| 698 | */ | ||
| 699 | new->next = prev->next; | ||
| 700 | prev->next = new; | ||
| 701 | } | 764 | } |
| 702 | 765 | ||
| 703 | void alloc_new_counters(int pkg, int core, int cpu) | 766 | /* |
| 767 | * cpu_is_first_core_in_package(cpu) | ||
| 768 | * return 1 if given CPU is 1st core in package | ||
| 769 | */ | ||
| 770 | int cpu_is_first_core_in_package(int cpu) | ||
| 704 | { | 771 | { |
| 705 | struct counters *new; | 772 | char path[64]; |
| 706 | 773 | FILE *filep; | |
| 707 | if (verbose > 1) | 774 | int first_cpu; |
| 708 | printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); | ||
| 709 | |||
| 710 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
| 711 | if (new == NULL) { | ||
| 712 | perror("calloc"); | ||
| 713 | exit(1); | ||
| 714 | } | ||
| 715 | new->pkg = pkg; | ||
| 716 | new->core = core; | ||
| 717 | new->cpu = cpu; | ||
| 718 | insert_counters(&cnt_odd, new); | ||
| 719 | |||
| 720 | new = (struct counters *)calloc(1, | ||
| 721 | sizeof(struct counters)); | ||
| 722 | if (new == NULL) { | ||
| 723 | perror("calloc"); | ||
| 724 | exit(1); | ||
| 725 | } | ||
| 726 | new->pkg = pkg; | ||
| 727 | new->core = core; | ||
| 728 | new->cpu = cpu; | ||
| 729 | insert_counters(&cnt_even, new); | ||
| 730 | |||
| 731 | new = (struct counters *)calloc(1, sizeof(struct counters)); | ||
| 732 | if (new == NULL) { | ||
| 733 | perror("calloc"); | ||
| 734 | exit(1); | ||
| 735 | } | ||
| 736 | new->pkg = pkg; | ||
| 737 | new->core = core; | ||
| 738 | new->cpu = cpu; | ||
| 739 | insert_counters(&cnt_delta, new); | ||
| 740 | 775 | ||
| 741 | new = (struct counters *)calloc(1, sizeof(struct counters)); | 776 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); |
| 742 | if (new == NULL) { | 777 | filep = fopen(path, "r"); |
| 743 | perror("calloc"); | 778 | if (filep == NULL) { |
| 779 | perror(path); | ||
| 744 | exit(1); | 780 | exit(1); |
| 745 | } | 781 | } |
| 746 | new->pkg = pkg; | 782 | fscanf(filep, "%d", &first_cpu); |
| 747 | new->core = core; | 783 | fclose(filep); |
| 748 | new->cpu = cpu; | 784 | return (cpu == first_cpu); |
| 749 | cnt_average = new; | ||
| 750 | } | 785 | } |
| 751 | 786 | ||
| 752 | int get_physical_package_id(int cpu) | 787 | int get_physical_package_id(int cpu) |
| 753 | { | 788 | { |
| 754 | char path[64]; | 789 | char path[80]; |
| 755 | FILE *filep; | 790 | FILE *filep; |
| 756 | int pkg; | 791 | int pkg; |
| 757 | 792 | ||
| @@ -768,7 +803,7 @@ int get_physical_package_id(int cpu) | |||
| 768 | 803 | ||
| 769 | int get_core_id(int cpu) | 804 | int get_core_id(int cpu) |
| 770 | { | 805 | { |
| 771 | char path[64]; | 806 | char path[80]; |
| 772 | FILE *filep; | 807 | FILE *filep; |
| 773 | int core; | 808 | int core; |
| 774 | 809 | ||
| @@ -783,14 +818,87 @@ int get_core_id(int cpu) | |||
| 783 | return core; | 818 | return core; |
| 784 | } | 819 | } |
| 785 | 820 | ||
| 821 | int get_num_ht_siblings(int cpu) | ||
| 822 | { | ||
| 823 | char path[80]; | ||
| 824 | FILE *filep; | ||
| 825 | int sib1, sib2; | ||
| 826 | int matches; | ||
| 827 | char character; | ||
| 828 | |||
| 829 | sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); | ||
| 830 | filep = fopen(path, "r"); | ||
| 831 | if (filep == NULL) { | ||
| 832 | perror(path); | ||
| 833 | exit(1); | ||
| 834 | } | ||
| 835 | /* | ||
| 836 | * file format: | ||
| 837 | * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) | ||
| 838 | * otherwinse 1 sibling (self). | ||
| 839 | */ | ||
| 840 | matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); | ||
| 841 | |||
| 842 | fclose(filep); | ||
| 843 | |||
| 844 | if (matches == 3) | ||
| 845 | return 2; | ||
| 846 | else | ||
| 847 | return 1; | ||
| 848 | } | ||
| 849 | |||
| 786 | /* | 850 | /* |
| 787 | * run func(pkg, core, cpu) on every cpu in /proc/stat | 851 | * run func(thread, core, package) in topology order |
| 852 | * skip non-present cpus | ||
| 788 | */ | 853 | */ |
| 789 | 854 | ||
| 790 | int for_all_cpus(void (func)(int, int, int)) | 855 | int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, |
| 856 | struct pkg_data *, struct thread_data *, struct core_data *, | ||
| 857 | struct pkg_data *), struct thread_data *thread_base, | ||
| 858 | struct core_data *core_base, struct pkg_data *pkg_base, | ||
| 859 | struct thread_data *thread_base2, struct core_data *core_base2, | ||
| 860 | struct pkg_data *pkg_base2) | ||
| 861 | { | ||
| 862 | int retval, pkg_no, core_no, thread_no; | ||
| 863 | |||
| 864 | for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { | ||
| 865 | for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { | ||
| 866 | for (thread_no = 0; thread_no < | ||
| 867 | topo.num_threads_per_core; ++thread_no) { | ||
| 868 | struct thread_data *t, *t2; | ||
| 869 | struct core_data *c, *c2; | ||
| 870 | struct pkg_data *p, *p2; | ||
| 871 | |||
| 872 | t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); | ||
| 873 | |||
| 874 | if (cpu_is_not_present(t->cpu_id)) | ||
| 875 | continue; | ||
| 876 | |||
| 877 | t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); | ||
| 878 | |||
| 879 | c = GET_CORE(core_base, core_no, pkg_no); | ||
| 880 | c2 = GET_CORE(core_base2, core_no, pkg_no); | ||
| 881 | |||
| 882 | p = GET_PKG(pkg_base, pkg_no); | ||
| 883 | p2 = GET_PKG(pkg_base2, pkg_no); | ||
| 884 | |||
| 885 | retval = func(t, c, p, t2, c2, p2); | ||
| 886 | if (retval) | ||
| 887 | return retval; | ||
| 888 | } | ||
| 889 | } | ||
| 890 | } | ||
| 891 | return 0; | ||
| 892 | } | ||
| 893 | |||
| 894 | /* | ||
| 895 | * run func(cpu) on every cpu in /proc/stat | ||
| 896 | * return max_cpu number | ||
| 897 | */ | ||
| 898 | int for_all_proc_cpus(int (func)(int)) | ||
| 791 | { | 899 | { |
| 792 | FILE *fp; | 900 | FILE *fp; |
| 793 | int cpu_count; | 901 | int cpu_num; |
| 794 | int retval; | 902 | int retval; |
| 795 | 903 | ||
| 796 | fp = fopen(proc_stat, "r"); | 904 | fp = fopen(proc_stat, "r"); |
| @@ -805,78 +913,88 @@ int for_all_cpus(void (func)(int, int, int)) | |||
| 805 | exit(1); | 913 | exit(1); |
| 806 | } | 914 | } |
| 807 | 915 | ||
| 808 | for (cpu_count = 0; ; cpu_count++) { | 916 | while (1) { |
| 809 | int cpu; | 917 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); |
| 810 | |||
| 811 | retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); | ||
| 812 | if (retval != 1) | 918 | if (retval != 1) |
| 813 | break; | 919 | break; |
| 814 | 920 | ||
| 815 | func(get_physical_package_id(cpu), get_core_id(cpu), cpu); | 921 | retval = func(cpu_num); |
| 922 | if (retval) { | ||
| 923 | fclose(fp); | ||
| 924 | return(retval); | ||
| 925 | } | ||
| 816 | } | 926 | } |
| 817 | fclose(fp); | 927 | fclose(fp); |
| 818 | return cpu_count; | 928 | return 0; |
| 819 | } | 929 | } |
| 820 | 930 | ||
| 821 | void re_initialize(void) | 931 | void re_initialize(void) |
| 822 | { | 932 | { |
| 823 | free_all_counters(); | 933 | free_all_buffers(); |
| 824 | num_cpus = for_all_cpus(alloc_new_counters); | 934 | setup_all_buffers(); |
| 825 | cpu_mask_uninit(); | 935 | printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); |
| 826 | cpu_mask_init(num_cpus); | ||
| 827 | printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); | ||
| 828 | } | 936 | } |
| 829 | 937 | ||
| 830 | void dummy(int pkg, int core, int cpu) { return; } | 938 | |
| 831 | /* | 939 | /* |
| 832 | * check to see if a cpu came on-line | 940 | * count_cpus() |
| 941 | * remember the last one seen, it will be the max | ||
| 833 | */ | 942 | */ |
| 834 | int verify_num_cpus(void) | 943 | int count_cpus(int cpu) |
| 835 | { | 944 | { |
| 836 | int new_num_cpus; | 945 | if (topo.max_cpu_num < cpu) |
| 946 | topo.max_cpu_num = cpu; | ||
| 837 | 947 | ||
| 838 | new_num_cpus = for_all_cpus(dummy); | 948 | topo.num_cpus += 1; |
| 839 | 949 | return 0; | |
| 840 | if (new_num_cpus != num_cpus) { | 950 | } |
| 841 | if (verbose) | 951 | int mark_cpu_present(int cpu) |
| 842 | printf("num_cpus was %d, is now %d\n", | 952 | { |
| 843 | num_cpus, new_num_cpus); | 953 | CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); |
| 844 | return -1; | ||
| 845 | } | ||
| 846 | return 0; | 954 | return 0; |
| 847 | } | 955 | } |
| 848 | 956 | ||
| 849 | void turbostat_loop() | 957 | void turbostat_loop() |
| 850 | { | 958 | { |
| 959 | int retval; | ||
| 960 | |||
| 851 | restart: | 961 | restart: |
| 852 | get_counters(cnt_even); | 962 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
| 963 | if (retval) { | ||
| 964 | re_initialize(); | ||
| 965 | goto restart; | ||
| 966 | } | ||
| 853 | gettimeofday(&tv_even, (struct timezone *)NULL); | 967 | gettimeofday(&tv_even, (struct timezone *)NULL); |
| 854 | 968 | ||
| 855 | while (1) { | 969 | while (1) { |
| 856 | if (verify_num_cpus()) { | 970 | if (for_all_proc_cpus(cpu_is_not_present)) { |
| 857 | re_initialize(); | 971 | re_initialize(); |
| 858 | goto restart; | 972 | goto restart; |
| 859 | } | 973 | } |
| 860 | sleep(interval_sec); | 974 | sleep(interval_sec); |
| 861 | if (get_counters(cnt_odd)) { | 975 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
| 976 | if (retval) { | ||
| 862 | re_initialize(); | 977 | re_initialize(); |
| 863 | goto restart; | 978 | goto restart; |
| 864 | } | 979 | } |
| 865 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 980 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
| 866 | compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
| 867 | timersub(&tv_odd, &tv_even, &tv_delta); | 981 | timersub(&tv_odd, &tv_even, &tv_delta); |
| 868 | compute_average(cnt_delta, cnt_average); | 982 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
| 869 | print_counters(cnt_delta); | 983 | compute_average(EVEN_COUNTERS); |
| 984 | format_all_counters(EVEN_COUNTERS); | ||
| 985 | flush_stdout(); | ||
| 870 | sleep(interval_sec); | 986 | sleep(interval_sec); |
| 871 | if (get_counters(cnt_even)) { | 987 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
| 988 | if (retval) { | ||
| 872 | re_initialize(); | 989 | re_initialize(); |
| 873 | goto restart; | 990 | goto restart; |
| 874 | } | 991 | } |
| 875 | gettimeofday(&tv_even, (struct timezone *)NULL); | 992 | gettimeofday(&tv_even, (struct timezone *)NULL); |
| 876 | compute_delta(cnt_even, cnt_odd, cnt_delta); | ||
| 877 | timersub(&tv_even, &tv_odd, &tv_delta); | 993 | timersub(&tv_even, &tv_odd, &tv_delta); |
| 878 | compute_average(cnt_delta, cnt_average); | 994 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); |
| 879 | print_counters(cnt_delta); | 995 | compute_average(ODD_COUNTERS); |
| 996 | format_all_counters(ODD_COUNTERS); | ||
| 997 | flush_stdout(); | ||
| 880 | } | 998 | } |
| 881 | } | 999 | } |
| 882 | 1000 | ||
| @@ -1051,6 +1169,208 @@ int open_dev_cpu_msr(int dummy1) | |||
| 1051 | return 0; | 1169 | return 0; |
| 1052 | } | 1170 | } |
| 1053 | 1171 | ||
| 1172 | void topology_probe() | ||
| 1173 | { | ||
| 1174 | int i; | ||
| 1175 | int max_core_id = 0; | ||
| 1176 | int max_package_id = 0; | ||
| 1177 | int max_siblings = 0; | ||
| 1178 | struct cpu_topology { | ||
| 1179 | int core_id; | ||
| 1180 | int physical_package_id; | ||
| 1181 | } *cpus; | ||
| 1182 | |||
| 1183 | /* Initialize num_cpus, max_cpu_num */ | ||
| 1184 | topo.num_cpus = 0; | ||
| 1185 | topo.max_cpu_num = 0; | ||
| 1186 | for_all_proc_cpus(count_cpus); | ||
| 1187 | if (!summary_only && topo.num_cpus > 1) | ||
| 1188 | show_cpu = 1; | ||
| 1189 | |||
| 1190 | if (verbose > 1) | ||
| 1191 | fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); | ||
| 1192 | |||
| 1193 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); | ||
| 1194 | if (cpus == NULL) { | ||
| 1195 | perror("calloc cpus"); | ||
| 1196 | exit(1); | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | /* | ||
| 1200 | * Allocate and initialize cpu_present_set | ||
| 1201 | */ | ||
| 1202 | cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
| 1203 | if (cpu_present_set == NULL) { | ||
| 1204 | perror("CPU_ALLOC"); | ||
| 1205 | exit(3); | ||
| 1206 | } | ||
| 1207 | cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
| 1208 | CPU_ZERO_S(cpu_present_setsize, cpu_present_set); | ||
| 1209 | for_all_proc_cpus(mark_cpu_present); | ||
| 1210 | |||
| 1211 | /* | ||
| 1212 | * Allocate and initialize cpu_affinity_set | ||
| 1213 | */ | ||
| 1214 | cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); | ||
| 1215 | if (cpu_affinity_set == NULL) { | ||
| 1216 | perror("CPU_ALLOC"); | ||
| 1217 | exit(3); | ||
| 1218 | } | ||
| 1219 | cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); | ||
| 1220 | CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); | ||
| 1221 | |||
| 1222 | |||
| 1223 | /* | ||
| 1224 | * For online cpus | ||
| 1225 | * find max_core_id, max_package_id | ||
| 1226 | */ | ||
| 1227 | for (i = 0; i <= topo.max_cpu_num; ++i) { | ||
| 1228 | int siblings; | ||
| 1229 | |||
| 1230 | if (cpu_is_not_present(i)) { | ||
| 1231 | if (verbose > 1) | ||
| 1232 | fprintf(stderr, "cpu%d NOT PRESENT\n", i); | ||
| 1233 | continue; | ||
| 1234 | } | ||
| 1235 | cpus[i].core_id = get_core_id(i); | ||
| 1236 | if (cpus[i].core_id > max_core_id) | ||
| 1237 | max_core_id = cpus[i].core_id; | ||
| 1238 | |||
| 1239 | cpus[i].physical_package_id = get_physical_package_id(i); | ||
| 1240 | if (cpus[i].physical_package_id > max_package_id) | ||
| 1241 | max_package_id = cpus[i].physical_package_id; | ||
| 1242 | |||
| 1243 | siblings = get_num_ht_siblings(i); | ||
| 1244 | if (siblings > max_siblings) | ||
| 1245 | max_siblings = siblings; | ||
| 1246 | if (verbose > 1) | ||
| 1247 | fprintf(stderr, "cpu %d pkg %d core %d\n", | ||
| 1248 | i, cpus[i].physical_package_id, cpus[i].core_id); | ||
| 1249 | } | ||
| 1250 | topo.num_cores_per_pkg = max_core_id + 1; | ||
| 1251 | if (verbose > 1) | ||
| 1252 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | ||
| 1253 | max_core_id, topo.num_cores_per_pkg); | ||
| 1254 | if (!summary_only && topo.num_cores_per_pkg > 1) | ||
| 1255 | show_core = 1; | ||
| 1256 | |||
| 1257 | topo.num_packages = max_package_id + 1; | ||
| 1258 | if (verbose > 1) | ||
| 1259 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | ||
| 1260 | max_package_id, topo.num_packages); | ||
| 1261 | if (!summary_only && topo.num_packages > 1) | ||
| 1262 | show_pkg = 1; | ||
| 1263 | |||
| 1264 | topo.num_threads_per_core = max_siblings; | ||
| 1265 | if (verbose > 1) | ||
| 1266 | fprintf(stderr, "max_siblings %d\n", max_siblings); | ||
| 1267 | |||
| 1268 | free(cpus); | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | void | ||
| 1272 | allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) | ||
| 1273 | { | ||
| 1274 | int i; | ||
| 1275 | |||
| 1276 | *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * | ||
| 1277 | topo.num_packages, sizeof(struct thread_data)); | ||
| 1278 | if (*t == NULL) | ||
| 1279 | goto error; | ||
| 1280 | |||
| 1281 | for (i = 0; i < topo.num_threads_per_core * | ||
| 1282 | topo.num_cores_per_pkg * topo.num_packages; i++) | ||
| 1283 | (*t)[i].cpu_id = -1; | ||
| 1284 | |||
| 1285 | *c = calloc(topo.num_cores_per_pkg * topo.num_packages, | ||
| 1286 | sizeof(struct core_data)); | ||
| 1287 | if (*c == NULL) | ||
| 1288 | goto error; | ||
| 1289 | |||
| 1290 | for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) | ||
| 1291 | (*c)[i].core_id = -1; | ||
| 1292 | |||
| 1293 | *p = calloc(topo.num_packages, sizeof(struct pkg_data)); | ||
| 1294 | if (*p == NULL) | ||
| 1295 | goto error; | ||
| 1296 | |||
| 1297 | for (i = 0; i < topo.num_packages; i++) | ||
| 1298 | (*p)[i].package_id = i; | ||
| 1299 | |||
| 1300 | return; | ||
| 1301 | error: | ||
| 1302 | perror("calloc counters"); | ||
| 1303 | exit(1); | ||
| 1304 | } | ||
| 1305 | /* | ||
| 1306 | * init_counter() | ||
| 1307 | * | ||
| 1308 | * set cpu_id, core_num, pkg_num | ||
| 1309 | * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE | ||
| 1310 | * | ||
| 1311 | * increment topo.num_cores when 1st core in pkg seen | ||
| 1312 | */ | ||
| 1313 | void init_counter(struct thread_data *thread_base, struct core_data *core_base, | ||
| 1314 | struct pkg_data *pkg_base, int thread_num, int core_num, | ||
| 1315 | int pkg_num, int cpu_id) | ||
| 1316 | { | ||
| 1317 | struct thread_data *t; | ||
| 1318 | struct core_data *c; | ||
| 1319 | struct pkg_data *p; | ||
| 1320 | |||
| 1321 | t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); | ||
| 1322 | c = GET_CORE(core_base, core_num, pkg_num); | ||
| 1323 | p = GET_PKG(pkg_base, pkg_num); | ||
| 1324 | |||
| 1325 | t->cpu_id = cpu_id; | ||
| 1326 | if (thread_num == 0) { | ||
| 1327 | t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; | ||
| 1328 | if (cpu_is_first_core_in_package(cpu_id)) | ||
| 1329 | t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | c->core_id = core_num; | ||
| 1333 | p->package_id = pkg_num; | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | |||
| 1337 | int initialize_counters(int cpu_id) | ||
| 1338 | { | ||
| 1339 | int my_thread_id, my_core_id, my_package_id; | ||
| 1340 | |||
| 1341 | my_package_id = get_physical_package_id(cpu_id); | ||
| 1342 | my_core_id = get_core_id(cpu_id); | ||
| 1343 | |||
| 1344 | if (cpu_is_first_sibling_in_core(cpu_id)) { | ||
| 1345 | my_thread_id = 0; | ||
| 1346 | topo.num_cores++; | ||
| 1347 | } else { | ||
| 1348 | my_thread_id = 1; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
| 1352 | init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); | ||
| 1353 | return 0; | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | void allocate_output_buffer() | ||
| 1357 | { | ||
| 1358 | output_buffer = calloc(1, (1 + topo.num_cpus) * 128); | ||
| 1359 | outp = output_buffer; | ||
| 1360 | if (outp == NULL) { | ||
| 1361 | perror("calloc"); | ||
| 1362 | exit(-1); | ||
| 1363 | } | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | void setup_all_buffers(void) | ||
| 1367 | { | ||
| 1368 | topology_probe(); | ||
| 1369 | allocate_counters(&thread_even, &core_even, &package_even); | ||
| 1370 | allocate_counters(&thread_odd, &core_odd, &package_odd); | ||
| 1371 | allocate_output_buffer(); | ||
| 1372 | for_all_proc_cpus(initialize_counters); | ||
| 1373 | } | ||
| 1054 | void turbostat_init() | 1374 | void turbostat_init() |
| 1055 | { | 1375 | { |
| 1056 | check_cpuid(); | 1376 | check_cpuid(); |
| @@ -1058,21 +1378,19 @@ void turbostat_init() | |||
| 1058 | check_dev_msr(); | 1378 | check_dev_msr(); |
| 1059 | check_super_user(); | 1379 | check_super_user(); |
| 1060 | 1380 | ||
| 1061 | num_cpus = for_all_cpus(alloc_new_counters); | 1381 | setup_all_buffers(); |
| 1062 | cpu_mask_init(num_cpus); | ||
| 1063 | 1382 | ||
| 1064 | if (verbose) | 1383 | if (verbose) |
| 1065 | print_nehalem_info(); | 1384 | print_verbose_header(); |
| 1066 | } | 1385 | } |
| 1067 | 1386 | ||
| 1068 | int fork_it(char **argv) | 1387 | int fork_it(char **argv) |
| 1069 | { | 1388 | { |
| 1070 | int retval; | ||
| 1071 | pid_t child_pid; | 1389 | pid_t child_pid; |
| 1072 | get_counters(cnt_even); | ||
| 1073 | 1390 | ||
| 1074 | /* clear affinity side-effect of get_counters() */ | 1391 | for_all_cpus(get_counters, EVEN_COUNTERS); |
| 1075 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | 1392 | /* clear affinity side-effect of get_counters() */ |
| 1393 | sched_setaffinity(0, cpu_present_setsize, cpu_present_set); | ||
| 1076 | gettimeofday(&tv_even, (struct timezone *)NULL); | 1394 | gettimeofday(&tv_even, (struct timezone *)NULL); |
| 1077 | 1395 | ||
| 1078 | child_pid = fork(); | 1396 | child_pid = fork(); |
| @@ -1095,14 +1413,17 @@ int fork_it(char **argv) | |||
| 1095 | exit(1); | 1413 | exit(1); |
| 1096 | } | 1414 | } |
| 1097 | } | 1415 | } |
| 1098 | get_counters(cnt_odd); | 1416 | /* |
| 1417 | * n.b. fork_it() does not check for errors from for_all_cpus() | ||
| 1418 | * because re-starting is problematic when forking | ||
| 1419 | */ | ||
| 1420 | for_all_cpus(get_counters, ODD_COUNTERS); | ||
| 1099 | gettimeofday(&tv_odd, (struct timezone *)NULL); | 1421 | gettimeofday(&tv_odd, (struct timezone *)NULL); |
| 1100 | retval = compute_delta(cnt_odd, cnt_even, cnt_delta); | ||
| 1101 | |||
| 1102 | timersub(&tv_odd, &tv_even, &tv_delta); | 1422 | timersub(&tv_odd, &tv_even, &tv_delta); |
| 1103 | compute_average(cnt_delta, cnt_average); | 1423 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
| 1104 | if (!retval) | 1424 | compute_average(EVEN_COUNTERS); |
| 1105 | print_counters(cnt_delta); | 1425 | format_all_counters(EVEN_COUNTERS); |
| 1426 | flush_stderr(); | ||
| 1106 | 1427 | ||
| 1107 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); | 1428 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); |
| 1108 | 1429 | ||
| @@ -1115,8 +1436,14 @@ void cmdline(int argc, char **argv) | |||
| 1115 | 1436 | ||
| 1116 | progname = argv[0]; | 1437 | progname = argv[0]; |
| 1117 | 1438 | ||
| 1118 | while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { | 1439 | while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { |
| 1119 | switch (opt) { | 1440 | switch (opt) { |
| 1441 | case 'c': | ||
| 1442 | show_core_only++; | ||
| 1443 | break; | ||
| 1444 | case 'p': | ||
| 1445 | show_pkg_only++; | ||
| 1446 | break; | ||
| 1120 | case 's': | 1447 | case 's': |
| 1121 | summary_only++; | 1448 | summary_only++; |
| 1122 | break; | 1449 | break; |
| @@ -1142,10 +1469,8 @@ int main(int argc, char **argv) | |||
| 1142 | cmdline(argc, argv); | 1469 | cmdline(argc, argv); |
| 1143 | 1470 | ||
| 1144 | if (verbose > 1) | 1471 | if (verbose > 1) |
| 1145 | fprintf(stderr, "turbostat Dec 6, 2010" | 1472 | fprintf(stderr, "turbostat v2.0 May 16, 2012" |
| 1146 | " - Len Brown <lenb@kernel.org>\n"); | 1473 | " - Len Brown <lenb@kernel.org>\n"); |
| 1147 | if (verbose > 1) | ||
| 1148 | fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); | ||
| 1149 | 1474 | ||
| 1150 | turbostat_init(); | 1475 | turbostat_init(); |
| 1151 | 1476 | ||
