diff options
author | Thomas Renninger <trenn@suse.de> | 2011-08-11 19:11:35 -0400 |
---|---|---|
committer | Dominik Brodowski <linux@dominikbrodowski.net> | 2011-08-15 14:02:59 -0400 |
commit | 2dfc818b35cbea59188cc86e86e0a0efce2b0dbe (patch) | |
tree | f68c00903c323c3b837f830fa758ac5b55e01c24 /tools | |
parent | 75f25bd31d9315ab57e4fb5eba3340452febc48d (diff) |
cpupower: mperf monitor - Use TSC to calculate max frequency if possible
Which makes the implementation independent from cpufreq drivers.
Therefore this would also work on a Xen kernel where the hypervisor
is doing frequency switching and idle entering.
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/cpupower/Makefile | 2 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/mperf_monitor.c | 177 |
2 files changed, 131 insertions, 48 deletions
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 94c2cf0a98b8..11521d2f0a4c 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile | |||
@@ -24,7 +24,7 @@ | |||
24 | 24 | ||
25 | # Set the following to `true' to make a unstripped, unoptimized | 25 | # Set the following to `true' to make a unstripped, unoptimized |
26 | # binary. Leave this set to `false' for production use. | 26 | # binary. Leave this set to `false' for production use. |
27 | DEBUG ?= false | 27 | DEBUG ?= true |
28 | 28 | ||
29 | # make the build silent. Set this to something else to make it noisy again. | 29 | # make the build silent. Set this to something else to make it noisy again. |
30 | V ?= false | 30 | V ?= false |
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 63ca87a05e5f..5650ab5a2c20 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c | |||
@@ -22,12 +22,15 @@ | |||
22 | 22 | ||
23 | #define MSR_TSC 0x10 | 23 | #define MSR_TSC 0x10 |
24 | 24 | ||
25 | #define MSR_AMD_HWCR 0xc0010015 | ||
26 | |||
25 | enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT }; | 27 | enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT }; |
26 | 28 | ||
27 | static int mperf_get_count_percent(unsigned int self_id, double *percent, | 29 | static int mperf_get_count_percent(unsigned int self_id, double *percent, |
28 | unsigned int cpu); | 30 | unsigned int cpu); |
29 | static int mperf_get_count_freq(unsigned int id, unsigned long long *count, | 31 | static int mperf_get_count_freq(unsigned int id, unsigned long long *count, |
30 | unsigned int cpu); | 32 | unsigned int cpu); |
33 | static struct timespec time_start, time_end; | ||
31 | 34 | ||
32 | static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { | 35 | static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { |
33 | { | 36 | { |
@@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { | |||
54 | }, | 57 | }, |
55 | }; | 58 | }; |
56 | 59 | ||
60 | enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF }; | ||
61 | static int max_freq_mode; | ||
62 | /* | ||
63 | * The max frequency mperf is ticking at (in C0), either retrieved via: | ||
64 | * 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency | ||
65 | * 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time | ||
66 | * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen) | ||
67 | */ | ||
68 | static unsigned long max_frequency; | ||
69 | |||
57 | static unsigned long long tsc_at_measure_start; | 70 | static unsigned long long tsc_at_measure_start; |
58 | static unsigned long long tsc_at_measure_end; | 71 | static unsigned long long tsc_at_measure_end; |
59 | static unsigned long max_frequency; | ||
60 | static unsigned long long *mperf_previous_count; | 72 | static unsigned long long *mperf_previous_count; |
61 | static unsigned long long *aperf_previous_count; | 73 | static unsigned long long *aperf_previous_count; |
62 | static unsigned long long *mperf_current_count; | 74 | static unsigned long long *mperf_current_count; |
63 | static unsigned long long *aperf_current_count; | 75 | static unsigned long long *aperf_current_count; |
76 | |||
64 | /* valid flag for all CPUs. If a MSR read failed it will be zero */ | 77 | /* valid flag for all CPUs. If a MSR read failed it will be zero */ |
65 | static int *is_valid; | 78 | static int *is_valid; |
66 | 79 | ||
67 | static int mperf_get_tsc(unsigned long long *tsc) | 80 | static int mperf_get_tsc(unsigned long long *tsc) |
68 | { | 81 | { |
69 | return read_msr(0, MSR_TSC, tsc); | 82 | int ret; |
83 | ret = read_msr(0, MSR_TSC, tsc); | ||
84 | if (ret) | ||
85 | dprint("Reading TSC MSR failed, returning %llu\n", *tsc); | ||
86 | return ret; | ||
70 | } | 87 | } |
71 | 88 | ||
72 | static int mperf_init_stats(unsigned int cpu) | 89 | static int mperf_init_stats(unsigned int cpu) |
@@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu) | |||
97 | return 0; | 114 | return 0; |
98 | } | 115 | } |
99 | 116 | ||
100 | /* | ||
101 | * get_average_perf() | ||
102 | * | ||
103 | * Returns the average performance (also considers boosted frequencies) | ||
104 | * | ||
105 | * Input: | ||
106 | * aperf_diff: Difference of the aperf register over a time period | ||
107 | * mperf_diff: Difference of the mperf register over the same time period | ||
108 | * max_freq: Maximum frequency (P0) | ||
109 | * | ||
110 | * Returns: | ||
111 | * Average performance over the time period | ||
112 | */ | ||
113 | static unsigned long get_average_perf(unsigned long long aperf_diff, | ||
114 | unsigned long long mperf_diff) | ||
115 | { | ||
116 | unsigned int perf_percent = 0; | ||
117 | if (((unsigned long)(-1) / 100) < aperf_diff) { | ||
118 | int shift_count = 7; | ||
119 | aperf_diff >>= shift_count; | ||
120 | mperf_diff >>= shift_count; | ||
121 | } | ||
122 | perf_percent = (aperf_diff * 100) / mperf_diff; | ||
123 | return (max_frequency * perf_percent) / 100; | ||
124 | } | ||
125 | |||
126 | static int mperf_get_count_percent(unsigned int id, double *percent, | 117 | static int mperf_get_count_percent(unsigned int id, double *percent, |
127 | unsigned int cpu) | 118 | unsigned int cpu) |
128 | { | 119 | { |
129 | unsigned long long aperf_diff, mperf_diff, tsc_diff; | 120 | unsigned long long aperf_diff, mperf_diff, tsc_diff; |
121 | unsigned long long timediff; | ||
130 | 122 | ||
131 | if (!is_valid[cpu]) | 123 | if (!is_valid[cpu]) |
132 | return -1; | 124 | return -1; |
@@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent, | |||
136 | 128 | ||
137 | mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; | 129 | mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; |
138 | aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; | 130 | aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; |
139 | tsc_diff = tsc_at_measure_end - tsc_at_measure_start; | ||
140 | 131 | ||
141 | *percent = 100.0 * mperf_diff / tsc_diff; | 132 | if (max_freq_mode == MAX_FREQ_TSC_REF) { |
142 | dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n", | 133 | tsc_diff = tsc_at_measure_end - tsc_at_measure_start; |
143 | mperf_cstates[id].name, mperf_diff, tsc_diff); | 134 | *percent = 100.0 * mperf_diff / tsc_diff; |
135 | dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", | ||
136 | mperf_cstates[id].name, mperf_diff, tsc_diff); | ||
137 | } else if (max_freq_mode == MAX_FREQ_SYSFS) { | ||
138 | timediff = timespec_diff_us(time_start, time_end); | ||
139 | *percent = 100.0 * mperf_diff / timediff; | ||
140 | dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", | ||
141 | mperf_cstates[id].name, mperf_diff, timediff); | ||
142 | } else | ||
143 | return -1; | ||
144 | 144 | ||
145 | if (id == Cx) | 145 | if (id == Cx) |
146 | *percent = 100.0 - *percent; | 146 | *percent = 100.0 - *percent; |
@@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, | |||
154 | static int mperf_get_count_freq(unsigned int id, unsigned long long *count, | 154 | static int mperf_get_count_freq(unsigned int id, unsigned long long *count, |
155 | unsigned int cpu) | 155 | unsigned int cpu) |
156 | { | 156 | { |
157 | unsigned long long aperf_diff, mperf_diff; | 157 | unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff; |
158 | 158 | ||
159 | if (id != AVG_FREQ) | 159 | if (id != AVG_FREQ) |
160 | return 1; | 160 | return 1; |
@@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, | |||
165 | mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; | 165 | mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; |
166 | aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; | 166 | aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; |
167 | 167 | ||
168 | /* Return MHz for now, might want to return KHz if column width is more | 168 | if (max_freq_mode == MAX_FREQ_TSC_REF) { |
169 | generic */ | 169 | /* Calculate max_freq from TSC count */ |
170 | *count = get_average_perf(aperf_diff, mperf_diff) / 1000; | 170 | tsc_diff = tsc_at_measure_end - tsc_at_measure_start; |
171 | dprint("%s: %llu\n", mperf_cstates[id].name, *count); | 171 | time_diff = timespec_diff_us(time_start, time_end); |
172 | max_frequency = tsc_diff / time_diff; | ||
173 | } | ||
172 | 174 | ||
175 | *count = max_frequency * ((double)aperf_diff / mperf_diff); | ||
176 | dprint("%s: Average freq based on %s maximum frequency:\n", | ||
177 | mperf_cstates[id].name, | ||
178 | (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read"); | ||
179 | dprint("%max_frequency: %lu", max_frequency); | ||
180 | dprint("aperf_diff: %llu\n", aperf_diff); | ||
181 | dprint("mperf_diff: %llu\n", mperf_diff); | ||
182 | dprint("avg freq: %llu\n", *count); | ||
173 | return 0; | 183 | return 0; |
174 | } | 184 | } |
175 | 185 | ||
@@ -178,6 +188,7 @@ static int mperf_start(void) | |||
178 | int cpu; | 188 | int cpu; |
179 | unsigned long long dbg; | 189 | unsigned long long dbg; |
180 | 190 | ||
191 | clock_gettime(CLOCK_REALTIME, &time_start); | ||
181 | mperf_get_tsc(&tsc_at_measure_start); | 192 | mperf_get_tsc(&tsc_at_measure_start); |
182 | 193 | ||
183 | for (cpu = 0; cpu < cpu_count; cpu++) | 194 | for (cpu = 0; cpu < cpu_count; cpu++) |
@@ -193,32 +204,104 @@ static int mperf_stop(void) | |||
193 | unsigned long long dbg; | 204 | unsigned long long dbg; |
194 | int cpu; | 205 | int cpu; |
195 | 206 | ||
196 | mperf_get_tsc(&tsc_at_measure_end); | ||
197 | |||
198 | for (cpu = 0; cpu < cpu_count; cpu++) | 207 | for (cpu = 0; cpu < cpu_count; cpu++) |
199 | mperf_measure_stats(cpu); | 208 | mperf_measure_stats(cpu); |
200 | 209 | ||
210 | mperf_get_tsc(&tsc_at_measure_end); | ||
211 | clock_gettime(CLOCK_REALTIME, &time_end); | ||
212 | |||
201 | mperf_get_tsc(&dbg); | 213 | mperf_get_tsc(&dbg); |
202 | dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); | 214 | dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); |
203 | 215 | ||
204 | return 0; | 216 | return 0; |
205 | } | 217 | } |
206 | 218 | ||
207 | struct cpuidle_monitor mperf_monitor; | 219 | /* |
208 | 220 | * Mperf register is defined to tick at P0 (maximum) frequency | |
209 | struct cpuidle_monitor *mperf_register(void) | 221 | * |
222 | * Instead of reading out P0 which can be tricky to read out from HW, | ||
223 | * we use TSC counter if it reliably ticks at P0/mperf frequency. | ||
224 | * | ||
225 | * Still try to fall back to: | ||
226 | * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq | ||
227 | * on older Intel HW without invariant TSC feature. | ||
228 | * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but | ||
229 | * it's still double checked (MSR_AMD_HWCR)). | ||
230 | * | ||
231 | * On these machines the user would still get useful mperf | ||
232 | * stats when acpi-cpufreq driver is loaded. | ||
233 | */ | ||
234 | static int init_maxfreq_mode(void) | ||
210 | { | 235 | { |
236 | int ret; | ||
237 | unsigned long long hwcr; | ||
211 | unsigned long min; | 238 | unsigned long min; |
212 | 239 | ||
213 | if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) | 240 | if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC) |
214 | return NULL; | 241 | goto use_sysfs; |
215 | 242 | ||
216 | /* Assume min/max all the same on all cores */ | 243 | if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) { |
244 | /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf | ||
245 | * freq. | ||
246 | * A test whether hwcr is accessable/available would be: | ||
247 | * (cpupower_cpu_info.family > 0x10 || | ||
248 | * cpupower_cpu_info.family == 0x10 && | ||
249 | * cpupower_cpu_info.model >= 0x2)) | ||
250 | * This should be the case for all aperf/mperf | ||
251 | * capable AMD machines and is therefore safe to test here. | ||
252 | * Compare with Linus kernel git commit: acf01734b1747b1ec4 | ||
253 | */ | ||
254 | ret = read_msr(0, MSR_AMD_HWCR, &hwcr); | ||
255 | /* | ||
256 | * If the MSR read failed, assume a Xen system that did | ||
257 | * not explicitly provide access to it and assume TSC works | ||
258 | */ | ||
259 | if (ret != 0) { | ||
260 | dprint("TSC read 0x%x failed - assume TSC working\n", | ||
261 | MSR_AMD_HWCR); | ||
262 | return 0; | ||
263 | } else if (1 & (hwcr >> 24)) { | ||
264 | max_freq_mode = MAX_FREQ_TSC_REF; | ||
265 | return 0; | ||
266 | } else { /* Use sysfs max frequency if available */ } | ||
267 | } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) { | ||
268 | /* | ||
269 | * On Intel we assume mperf (in C0) is ticking at same | ||
270 | * rate than TSC | ||
271 | */ | ||
272 | max_freq_mode = MAX_FREQ_TSC_REF; | ||
273 | return 0; | ||
274 | } | ||
275 | use_sysfs: | ||
217 | if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) { | 276 | if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) { |
218 | dprint("Cannot retrieve max freq from cpufreq kernel " | 277 | dprint("Cannot retrieve max freq from cpufreq kernel " |
219 | "subsystem\n"); | 278 | "subsystem\n"); |
220 | return NULL; | 279 | return -1; |
221 | } | 280 | } |
281 | max_freq_mode = MAX_FREQ_SYSFS; | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * This monitor provides: | ||
287 | * | ||
288 | * 1) Average frequency a CPU resided in | ||
289 | * This always works if the CPU has aperf/mperf capabilities | ||
290 | * | ||
291 | * 2) C0 and Cx (any sleep state) time a CPU resided in | ||
292 | * Works if mperf timer stops ticking in sleep states which | ||
293 | * seem to be the case on all current HW. | ||
294 | * Both is directly retrieved from HW registers and is independent | ||
295 | * from kernel statistics. | ||
296 | */ | ||
297 | struct cpuidle_monitor mperf_monitor; | ||
298 | struct cpuidle_monitor *mperf_register(void) | ||
299 | { | ||
300 | if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) | ||
301 | return NULL; | ||
302 | |||
303 | if (init_maxfreq_mode()) | ||
304 | return NULL; | ||
222 | 305 | ||
223 | /* Free this at program termination */ | 306 | /* Free this at program termination */ |
224 | is_valid = calloc(cpu_count, sizeof(int)); | 307 | is_valid = calloc(cpu_count, sizeof(int)); |