diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-13 03:00:03 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-14 14:30:48 -0500 |
commit | ee06094f8279e1312fc0a31591320cc7b6f0ab1e (patch) | |
tree | aecf8f2177b2398e4db8df68a9705009b31a8ef7 /arch | |
parent | 9b194e831fb2c322ed81a373e49620f34edc2778 (diff) |
perfcounters: restructure x86 counter math
Impact: restructure code
Change counter math from absolute values to clear delta logic.
We try to extract elapsed deltas from the raw hw counter - and put
that into the generic counter.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 230 |
2 files changed, 125 insertions, 107 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f2fdc1867241..fe94490bab61 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -643,7 +643,7 @@ config X86_UP_IOAPIC | |||
643 | config X86_LOCAL_APIC | 643 | config X86_LOCAL_APIC |
644 | def_bool y | 644 | def_bool y |
645 | depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) | 645 | depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) |
646 | select HAVE_PERF_COUNTERS | 646 | select HAVE_PERF_COUNTERS if (!M386 && !M486) |
647 | 647 | ||
648 | config X86_IO_APIC | 648 | config X86_IO_APIC |
649 | def_bool y | 649 | def_bool y |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b903f8df72bb..5afae13d8d59 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -54,6 +54,48 @@ const int intel_perfmon_event_map[] = | |||
54 | const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); | 54 | const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Propagate counter elapsed time into the generic counter. | ||
58 | * Can only be executed on the CPU where the counter is active. | ||
59 | * Returns the delta events processed. | ||
60 | */ | ||
61 | static void | ||
62 | x86_perf_counter_update(struct perf_counter *counter, | ||
63 | struct hw_perf_counter *hwc, int idx) | ||
64 | { | ||
65 | u64 prev_raw_count, new_raw_count, delta; | ||
66 | |||
67 | WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE); | ||
68 | /* | ||
69 | * Careful: an NMI might modify the previous counter value. | ||
70 | * | ||
71 | * Our tactic to handle this is to first atomically read and | ||
72 | * exchange a new raw count - then add that new-prev delta | ||
73 | * count to the generic counter atomically: | ||
74 | */ | ||
75 | again: | ||
76 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
77 | rdmsrl(hwc->counter_base + idx, new_raw_count); | ||
78 | |||
79 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
80 | new_raw_count) != prev_raw_count) | ||
81 | goto again; | ||
82 | |||
83 | /* | ||
84 | * Now we have the new raw value and have updated the prev | ||
85 | * timestamp already. We can now calculate the elapsed delta | ||
86 | * (counter-)time and add that to the generic counter. | ||
87 | * | ||
88 | * Careful, not all hw sign-extends above the physical width | ||
89 | * of the count, so we do that by clipping the delta to 32 bits: | ||
90 | */ | ||
91 | delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count); | ||
92 | WARN_ON_ONCE((int)delta < 0); | ||
93 | |||
94 | atomic64_add(delta, &counter->count); | ||
95 | atomic64_sub(delta, &hwc->period_left); | ||
96 | } | ||
97 | |||
98 | /* | ||
57 | * Setup the hardware configuration for a given hw_event_type | 99 | * Setup the hardware configuration for a given hw_event_type |
58 | */ | 100 | */ |
59 | static int __hw_perf_counter_init(struct perf_counter *counter) | 101 | static int __hw_perf_counter_init(struct perf_counter *counter) |
@@ -90,10 +132,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
90 | * so we install an artificial 1<<31 period regardless of | 132 | * so we install an artificial 1<<31 period regardless of |
91 | * the generic counter period: | 133 | * the generic counter period: |
92 | */ | 134 | */ |
93 | if (!hwc->irq_period) | 135 | if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) |
94 | hwc->irq_period = 0x7FFFFFFF; | 136 | hwc->irq_period = 0x7FFFFFFF; |
95 | 137 | ||
96 | hwc->next_count = -(s32)hwc->irq_period; | 138 | atomic64_set(&hwc->period_left, hwc->irq_period); |
97 | 139 | ||
98 | /* | 140 | /* |
99 | * Raw event type provide the config in the event structure | 141 | * Raw event type provide the config in the event structure |
@@ -118,12 +160,6 @@ void hw_perf_enable_all(void) | |||
118 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); | 160 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); |
119 | } | 161 | } |
120 | 162 | ||
121 | void hw_perf_restore(u64 ctrl) | ||
122 | { | ||
123 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(hw_perf_restore); | ||
126 | |||
127 | u64 hw_perf_save_disable(void) | 163 | u64 hw_perf_save_disable(void) |
128 | { | 164 | { |
129 | u64 ctrl; | 165 | u64 ctrl; |
@@ -134,27 +170,74 @@ u64 hw_perf_save_disable(void) | |||
134 | } | 170 | } |
135 | EXPORT_SYMBOL_GPL(hw_perf_save_disable); | 171 | EXPORT_SYMBOL_GPL(hw_perf_save_disable); |
136 | 172 | ||
173 | void hw_perf_restore(u64 ctrl) | ||
174 | { | ||
175 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); | ||
176 | } | ||
177 | EXPORT_SYMBOL_GPL(hw_perf_restore); | ||
178 | |||
137 | static inline void | 179 | static inline void |
138 | __x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) | 180 | __x86_perf_counter_disable(struct perf_counter *counter, |
181 | struct hw_perf_counter *hwc, unsigned int idx) | ||
139 | { | 182 | { |
140 | wrmsr(hwc->config_base + idx, hwc->config, 0); | 183 | int err; |
184 | |||
185 | err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); | ||
186 | WARN_ON_ONCE(err); | ||
141 | } | 187 | } |
142 | 188 | ||
143 | static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); | 189 | static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); |
144 | 190 | ||
145 | static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) | 191 | /* |
192 | * Set the next IRQ period, based on the hwc->period_left value. | ||
193 | * To be called with the counter disabled in hw: | ||
194 | */ | ||
195 | static void | ||
196 | __hw_perf_counter_set_period(struct perf_counter *counter, | ||
197 | struct hw_perf_counter *hwc, int idx) | ||
146 | { | 198 | { |
147 | per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; | 199 | s32 left = atomic64_read(&hwc->period_left); |
200 | s32 period = hwc->irq_period; | ||
201 | |||
202 | WARN_ON_ONCE(period <= 0); | ||
203 | |||
204 | /* | ||
205 | * If we are way outside a reasoable range then just skip forward: | ||
206 | */ | ||
207 | if (unlikely(left <= -period)) { | ||
208 | left = period; | ||
209 | atomic64_set(&hwc->period_left, left); | ||
210 | } | ||
211 | |||
212 | if (unlikely(left <= 0)) { | ||
213 | left += period; | ||
214 | atomic64_set(&hwc->period_left, left); | ||
215 | } | ||
148 | 216 | ||
149 | wrmsr(hwc->counter_base + idx, hwc->next_count, 0); | 217 | WARN_ON_ONCE(left <= 0); |
218 | |||
219 | per_cpu(prev_left[idx], smp_processor_id()) = left; | ||
220 | |||
221 | /* | ||
222 | * The hw counter starts counting from this counter offset, | ||
223 | * mark it to be able to extra future deltas: | ||
224 | */ | ||
225 | atomic64_set(&hwc->prev_count, (u64)(s64)-left); | ||
226 | |||
227 | wrmsr(hwc->counter_base + idx, -left, 0); | ||
150 | } | 228 | } |
151 | 229 | ||
152 | static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx) | 230 | static void |
231 | __x86_perf_counter_enable(struct perf_counter *counter, | ||
232 | struct hw_perf_counter *hwc, int idx) | ||
153 | { | 233 | { |
154 | wrmsr(hwc->config_base + idx, | 234 | wrmsr(hwc->config_base + idx, |
155 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); | 235 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); |
156 | } | 236 | } |
157 | 237 | ||
238 | /* | ||
239 | * Find a PMC slot for the freshly enabled / scheduled in counter: | ||
240 | */ | ||
158 | static void x86_perf_counter_enable(struct perf_counter *counter) | 241 | static void x86_perf_counter_enable(struct perf_counter *counter) |
159 | { | 242 | { |
160 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 243 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); |
@@ -170,55 +253,17 @@ static void x86_perf_counter_enable(struct perf_counter *counter) | |||
170 | 253 | ||
171 | perf_counters_lapic_init(hwc->nmi); | 254 | perf_counters_lapic_init(hwc->nmi); |
172 | 255 | ||
173 | __x86_perf_counter_disable(hwc, idx); | 256 | __x86_perf_counter_disable(counter, hwc, idx); |
174 | 257 | ||
175 | cpuc->counters[idx] = counter; | 258 | cpuc->counters[idx] = counter; |
176 | 259 | ||
177 | __hw_perf_counter_set_period(hwc, idx); | 260 | __hw_perf_counter_set_period(counter, hwc, idx); |
178 | __x86_perf_counter_enable(hwc, idx); | 261 | __x86_perf_counter_enable(counter, hwc, idx); |
179 | } | ||
180 | |||
181 | static void __hw_perf_save_counter(struct perf_counter *counter, | ||
182 | struct hw_perf_counter *hwc, int idx) | ||
183 | { | ||
184 | s64 raw = -1; | ||
185 | s64 delta; | ||
186 | |||
187 | /* | ||
188 | * Get the raw hw counter value: | ||
189 | */ | ||
190 | rdmsrl(hwc->counter_base + idx, raw); | ||
191 | |||
192 | /* | ||
193 | * Rebase it to zero (it started counting at -irq_period), | ||
194 | * to see the delta since ->prev_count: | ||
195 | */ | ||
196 | delta = (s64)hwc->irq_period + (s64)(s32)raw; | ||
197 | |||
198 | atomic64_counter_set(counter, hwc->prev_count + delta); | ||
199 | |||
200 | /* | ||
201 | * Adjust the ->prev_count offset - if we went beyond | ||
202 | * irq_period of units, then we got an IRQ and the counter | ||
203 | * was set back to -irq_period: | ||
204 | */ | ||
205 | while (delta >= (s64)hwc->irq_period) { | ||
206 | hwc->prev_count += hwc->irq_period; | ||
207 | delta -= (s64)hwc->irq_period; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Calculate the next raw counter value we'll write into | ||
212 | * the counter at the next sched-in time: | ||
213 | */ | ||
214 | delta -= (s64)hwc->irq_period; | ||
215 | |||
216 | hwc->next_count = (s32)delta; | ||
217 | } | 262 | } |
218 | 263 | ||
219 | void perf_counter_print_debug(void) | 264 | void perf_counter_print_debug(void) |
220 | { | 265 | { |
221 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; | 266 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; |
222 | int cpu, idx; | 267 | int cpu, idx; |
223 | 268 | ||
224 | if (!nr_hw_counters) | 269 | if (!nr_hw_counters) |
@@ -241,14 +286,14 @@ void perf_counter_print_debug(void) | |||
241 | rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); | 286 | rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); |
242 | rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); | 287 | rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); |
243 | 288 | ||
244 | next_count = per_cpu(prev_next_count[idx], cpu); | 289 | prev_left = per_cpu(prev_left[idx], cpu); |
245 | 290 | ||
246 | printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", | 291 | printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", |
247 | cpu, idx, pmc_ctrl); | 292 | cpu, idx, pmc_ctrl); |
248 | printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", | 293 | printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", |
249 | cpu, idx, pmc_count); | 294 | cpu, idx, pmc_count); |
250 | printk(KERN_INFO "CPU#%d: PMC%d next: %016llx\n", | 295 | printk(KERN_INFO "CPU#%d: PMC%d left: %016llx\n", |
251 | cpu, idx, next_count); | 296 | cpu, idx, prev_left); |
252 | } | 297 | } |
253 | local_irq_enable(); | 298 | local_irq_enable(); |
254 | } | 299 | } |
@@ -259,29 +304,16 @@ static void x86_perf_counter_disable(struct perf_counter *counter) | |||
259 | struct hw_perf_counter *hwc = &counter->hw; | 304 | struct hw_perf_counter *hwc = &counter->hw; |
260 | unsigned int idx = hwc->idx; | 305 | unsigned int idx = hwc->idx; |
261 | 306 | ||
262 | __x86_perf_counter_disable(hwc, idx); | 307 | __x86_perf_counter_disable(counter, hwc, idx); |
263 | 308 | ||
264 | clear_bit(idx, cpuc->used); | 309 | clear_bit(idx, cpuc->used); |
265 | cpuc->counters[idx] = NULL; | 310 | cpuc->counters[idx] = NULL; |
266 | __hw_perf_save_counter(counter, hwc, idx); | ||
267 | } | ||
268 | 311 | ||
269 | static void x86_perf_counter_read(struct perf_counter *counter) | 312 | /* |
270 | { | 313 | * Drain the remaining delta count out of a counter |
271 | struct hw_perf_counter *hwc = &counter->hw; | 314 | * that we are disabling: |
272 | unsigned long addr = hwc->counter_base + hwc->idx; | 315 | */ |
273 | s64 offs, val = -1LL; | 316 | x86_perf_counter_update(counter, hwc, idx); |
274 | s32 val32; | ||
275 | |||
276 | /* Careful: NMI might modify the counter offset */ | ||
277 | do { | ||
278 | offs = hwc->prev_count; | ||
279 | rdmsrl(addr, val); | ||
280 | } while (offs != hwc->prev_count); | ||
281 | |||
282 | val32 = (s32) val; | ||
283 | val = (s64)hwc->irq_period + (s64)val32; | ||
284 | atomic64_counter_set(counter, hwc->prev_count + val); | ||
285 | } | 317 | } |
286 | 318 | ||
287 | static void perf_store_irq_data(struct perf_counter *counter, u64 data) | 319 | static void perf_store_irq_data(struct perf_counter *counter, u64 data) |
@@ -299,7 +331,8 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data) | |||
299 | } | 331 | } |
300 | 332 | ||
301 | /* | 333 | /* |
302 | * NMI-safe enable method: | 334 | * Save and restart an expired counter. Called by NMI contexts, |
335 | * so it has to be careful about preempting normal counter ops: | ||
303 | */ | 336 | */ |
304 | static void perf_save_and_restart(struct perf_counter *counter) | 337 | static void perf_save_and_restart(struct perf_counter *counter) |
305 | { | 338 | { |
@@ -309,45 +342,25 @@ static void perf_save_and_restart(struct perf_counter *counter) | |||
309 | 342 | ||
310 | rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); | 343 | rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); |
311 | 344 | ||
312 | __hw_perf_save_counter(counter, hwc, idx); | 345 | x86_perf_counter_update(counter, hwc, idx); |
313 | __hw_perf_counter_set_period(hwc, idx); | 346 | __hw_perf_counter_set_period(counter, hwc, idx); |
314 | 347 | ||
315 | if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) | 348 | if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) |
316 | __x86_perf_counter_enable(hwc, idx); | 349 | __x86_perf_counter_enable(counter, hwc, idx); |
317 | } | 350 | } |
318 | 351 | ||
319 | static void | 352 | static void |
320 | perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) | 353 | perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) |
321 | { | 354 | { |
322 | struct perf_counter *counter, *group_leader = sibling->group_leader; | 355 | struct perf_counter *counter, *group_leader = sibling->group_leader; |
323 | int bit; | ||
324 | |||
325 | /* | ||
326 | * Store the counter's own timestamp first: | ||
327 | */ | ||
328 | perf_store_irq_data(sibling, sibling->hw_event.type); | ||
329 | perf_store_irq_data(sibling, atomic64_counter_read(sibling)); | ||
330 | 356 | ||
331 | /* | 357 | /* |
332 | * Then store sibling timestamps (if any): | 358 | * Store sibling timestamps (if any): |
333 | */ | 359 | */ |
334 | list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { | 360 | list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { |
335 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) { | 361 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); |
336 | /* | ||
337 | * When counter was not in the overflow mask, we have to | ||
338 | * read it from hardware. We read it as well, when it | ||
339 | * has not been read yet and clear the bit in the | ||
340 | * status mask. | ||
341 | */ | ||
342 | bit = counter->hw.idx; | ||
343 | if (!test_bit(bit, (unsigned long *) overflown) || | ||
344 | test_bit(bit, (unsigned long *) status)) { | ||
345 | clear_bit(bit, (unsigned long *) status); | ||
346 | perf_save_and_restart(counter); | ||
347 | } | ||
348 | } | ||
349 | perf_store_irq_data(sibling, counter->hw_event.type); | 362 | perf_store_irq_data(sibling, counter->hw_event.type); |
350 | perf_store_irq_data(sibling, atomic64_counter_read(counter)); | 363 | perf_store_irq_data(sibling, atomic64_read(&counter->count)); |
351 | } | 364 | } |
352 | } | 365 | } |
353 | 366 | ||
@@ -540,6 +553,11 @@ void __init init_hw_perf_counters(void) | |||
540 | perf_counters_initialized = true; | 553 | perf_counters_initialized = true; |
541 | } | 554 | } |
542 | 555 | ||
556 | static void x86_perf_counter_read(struct perf_counter *counter) | ||
557 | { | ||
558 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); | ||
559 | } | ||
560 | |||
543 | static const struct hw_perf_counter_ops x86_perf_counter_ops = { | 561 | static const struct hw_perf_counter_ops x86_perf_counter_ops = { |
544 | .hw_perf_counter_enable = x86_perf_counter_enable, | 562 | .hw_perf_counter_enable = x86_perf_counter_enable, |
545 | .hw_perf_counter_disable = x86_perf_counter_disable, | 563 | .hw_perf_counter_disable = x86_perf_counter_disable, |