aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-13 03:00:03 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-14 14:30:48 -0500
commitee06094f8279e1312fc0a31591320cc7b6f0ab1e (patch)
treeaecf8f2177b2398e4db8df68a9705009b31a8ef7 /arch
parent9b194e831fb2c322ed81a373e49620f34edc2778 (diff)
perfcounters: restructure x86 counter math
Impact: restructure code Change counter math from absolute values to clear delta logic. We try to extract elapsed deltas from the raw hw counter - and put that into the generic counter. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c230
2 files changed, 125 insertions, 107 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f2fdc1867241..fe94490bab61 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -643,7 +643,7 @@ config X86_UP_IOAPIC
643config X86_LOCAL_APIC 643config X86_LOCAL_APIC
644 def_bool y 644 def_bool y
645 depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) 645 depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
646 select HAVE_PERF_COUNTERS 646 select HAVE_PERF_COUNTERS if (!M386 && !M486)
647 647
648config X86_IO_APIC 648config X86_IO_APIC
649 def_bool y 649 def_bool y
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b903f8df72bb..5afae13d8d59 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -54,6 +54,48 @@ const int intel_perfmon_event_map[] =
54const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); 54const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
55 55
56/* 56/*
57 * Propagate counter elapsed time into the generic counter.
58 * Can only be executed on the CPU where the counter is active.
59 * Returns the delta events processed.
60 */
61static void
62x86_perf_counter_update(struct perf_counter *counter,
63 struct hw_perf_counter *hwc, int idx)
64{
65 u64 prev_raw_count, new_raw_count, delta;
66
67 WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE);
68 /*
69 * Careful: an NMI might modify the previous counter value.
70 *
71 * Our tactic to handle this is to first atomically read and
72 * exchange a new raw count - then add that new-prev delta
73 * count to the generic counter atomically:
74 */
75again:
76 prev_raw_count = atomic64_read(&hwc->prev_count);
77 rdmsrl(hwc->counter_base + idx, new_raw_count);
78
79 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
80 new_raw_count) != prev_raw_count)
81 goto again;
82
83 /*
84 * Now we have the new raw value and have updated the prev
85 * timestamp already. We can now calculate the elapsed delta
86 * (counter-)time and add that to the generic counter.
87 *
88 * Careful, not all hw sign-extends above the physical width
89 * of the count, so we do that by clipping the delta to 32 bits:
90 */
91 delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
92 WARN_ON_ONCE((int)delta < 0);
93
94 atomic64_add(delta, &counter->count);
95 atomic64_sub(delta, &hwc->period_left);
96}
97
98/*
57 * Setup the hardware configuration for a given hw_event_type 99 * Setup the hardware configuration for a given hw_event_type
58 */ 100 */
59static int __hw_perf_counter_init(struct perf_counter *counter) 101static int __hw_perf_counter_init(struct perf_counter *counter)
@@ -90,10 +132,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
90 * so we install an artificial 1<<31 period regardless of 132 * so we install an artificial 1<<31 period regardless of
91 * the generic counter period: 133 * the generic counter period:
92 */ 134 */
93 if (!hwc->irq_period) 135 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
94 hwc->irq_period = 0x7FFFFFFF; 136 hwc->irq_period = 0x7FFFFFFF;
95 137
96 hwc->next_count = -(s32)hwc->irq_period; 138 atomic64_set(&hwc->period_left, hwc->irq_period);
97 139
98 /* 140 /*
99 * Raw event type provide the config in the event structure 141 * Raw event type provide the config in the event structure
@@ -118,12 +160,6 @@ void hw_perf_enable_all(void)
118 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); 160 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
119} 161}
120 162
121void hw_perf_restore(u64 ctrl)
122{
123 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
124}
125EXPORT_SYMBOL_GPL(hw_perf_restore);
126
127u64 hw_perf_save_disable(void) 163u64 hw_perf_save_disable(void)
128{ 164{
129 u64 ctrl; 165 u64 ctrl;
@@ -134,27 +170,74 @@ u64 hw_perf_save_disable(void)
134} 170}
135EXPORT_SYMBOL_GPL(hw_perf_save_disable); 171EXPORT_SYMBOL_GPL(hw_perf_save_disable);
136 172
173void hw_perf_restore(u64 ctrl)
174{
175 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
176}
177EXPORT_SYMBOL_GPL(hw_perf_restore);
178
137static inline void 179static inline void
138__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) 180__x86_perf_counter_disable(struct perf_counter *counter,
181 struct hw_perf_counter *hwc, unsigned int idx)
139{ 182{
140 wrmsr(hwc->config_base + idx, hwc->config, 0); 183 int err;
184
185 err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
186 WARN_ON_ONCE(err);
141} 187}
142 188
143static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); 189static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
144 190
145static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) 191/*
192 * Set the next IRQ period, based on the hwc->period_left value.
193 * To be called with the counter disabled in hw:
194 */
195static void
196__hw_perf_counter_set_period(struct perf_counter *counter,
197 struct hw_perf_counter *hwc, int idx)
146{ 198{
147 per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; 199 s32 left = atomic64_read(&hwc->period_left);
200 s32 period = hwc->irq_period;
201
202 WARN_ON_ONCE(period <= 0);
203
204 /*
205 * If we are way outside a reasoable range then just skip forward:
206 */
207 if (unlikely(left <= -period)) {
208 left = period;
209 atomic64_set(&hwc->period_left, left);
210 }
211
212 if (unlikely(left <= 0)) {
213 left += period;
214 atomic64_set(&hwc->period_left, left);
215 }
148 216
149 wrmsr(hwc->counter_base + idx, hwc->next_count, 0); 217 WARN_ON_ONCE(left <= 0);
218
219 per_cpu(prev_left[idx], smp_processor_id()) = left;
220
221 /*
222 * The hw counter starts counting from this counter offset,
223 * mark it to be able to extra future deltas:
224 */
225 atomic64_set(&hwc->prev_count, (u64)(s64)-left);
226
227 wrmsr(hwc->counter_base + idx, -left, 0);
150} 228}
151 229
152static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx) 230static void
231__x86_perf_counter_enable(struct perf_counter *counter,
232 struct hw_perf_counter *hwc, int idx)
153{ 233{
154 wrmsr(hwc->config_base + idx, 234 wrmsr(hwc->config_base + idx,
155 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); 235 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
156} 236}
157 237
238/*
239 * Find a PMC slot for the freshly enabled / scheduled in counter:
240 */
158static void x86_perf_counter_enable(struct perf_counter *counter) 241static void x86_perf_counter_enable(struct perf_counter *counter)
159{ 242{
160 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); 243 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -170,55 +253,17 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
170 253
171 perf_counters_lapic_init(hwc->nmi); 254 perf_counters_lapic_init(hwc->nmi);
172 255
173 __x86_perf_counter_disable(hwc, idx); 256 __x86_perf_counter_disable(counter, hwc, idx);
174 257
175 cpuc->counters[idx] = counter; 258 cpuc->counters[idx] = counter;
176 259
177 __hw_perf_counter_set_period(hwc, idx); 260 __hw_perf_counter_set_period(counter, hwc, idx);
178 __x86_perf_counter_enable(hwc, idx); 261 __x86_perf_counter_enable(counter, hwc, idx);
179}
180
181static void __hw_perf_save_counter(struct perf_counter *counter,
182 struct hw_perf_counter *hwc, int idx)
183{
184 s64 raw = -1;
185 s64 delta;
186
187 /*
188 * Get the raw hw counter value:
189 */
190 rdmsrl(hwc->counter_base + idx, raw);
191
192 /*
193 * Rebase it to zero (it started counting at -irq_period),
194 * to see the delta since ->prev_count:
195 */
196 delta = (s64)hwc->irq_period + (s64)(s32)raw;
197
198 atomic64_counter_set(counter, hwc->prev_count + delta);
199
200 /*
201 * Adjust the ->prev_count offset - if we went beyond
202 * irq_period of units, then we got an IRQ and the counter
203 * was set back to -irq_period:
204 */
205 while (delta >= (s64)hwc->irq_period) {
206 hwc->prev_count += hwc->irq_period;
207 delta -= (s64)hwc->irq_period;
208 }
209
210 /*
211 * Calculate the next raw counter value we'll write into
212 * the counter at the next sched-in time:
213 */
214 delta -= (s64)hwc->irq_period;
215
216 hwc->next_count = (s32)delta;
217} 262}
218 263
219void perf_counter_print_debug(void) 264void perf_counter_print_debug(void)
220{ 265{
221 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; 266 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left;
222 int cpu, idx; 267 int cpu, idx;
223 268
224 if (!nr_hw_counters) 269 if (!nr_hw_counters)
@@ -241,14 +286,14 @@ void perf_counter_print_debug(void)
241 rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); 286 rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
242 rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); 287 rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count);
243 288
244 next_count = per_cpu(prev_next_count[idx], cpu); 289 prev_left = per_cpu(prev_left[idx], cpu);
245 290
246 printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", 291 printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n",
247 cpu, idx, pmc_ctrl); 292 cpu, idx, pmc_ctrl);
248 printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", 293 printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n",
249 cpu, idx, pmc_count); 294 cpu, idx, pmc_count);
250 printk(KERN_INFO "CPU#%d: PMC%d next: %016llx\n", 295 printk(KERN_INFO "CPU#%d: PMC%d left: %016llx\n",
251 cpu, idx, next_count); 296 cpu, idx, prev_left);
252 } 297 }
253 local_irq_enable(); 298 local_irq_enable();
254} 299}
@@ -259,29 +304,16 @@ static void x86_perf_counter_disable(struct perf_counter *counter)
259 struct hw_perf_counter *hwc = &counter->hw; 304 struct hw_perf_counter *hwc = &counter->hw;
260 unsigned int idx = hwc->idx; 305 unsigned int idx = hwc->idx;
261 306
262 __x86_perf_counter_disable(hwc, idx); 307 __x86_perf_counter_disable(counter, hwc, idx);
263 308
264 clear_bit(idx, cpuc->used); 309 clear_bit(idx, cpuc->used);
265 cpuc->counters[idx] = NULL; 310 cpuc->counters[idx] = NULL;
266 __hw_perf_save_counter(counter, hwc, idx);
267}
268 311
269static void x86_perf_counter_read(struct perf_counter *counter) 312 /*
270{ 313 * Drain the remaining delta count out of a counter
271 struct hw_perf_counter *hwc = &counter->hw; 314 * that we are disabling:
272 unsigned long addr = hwc->counter_base + hwc->idx; 315 */
273 s64 offs, val = -1LL; 316 x86_perf_counter_update(counter, hwc, idx);
274 s32 val32;
275
276 /* Careful: NMI might modify the counter offset */
277 do {
278 offs = hwc->prev_count;
279 rdmsrl(addr, val);
280 } while (offs != hwc->prev_count);
281
282 val32 = (s32) val;
283 val = (s64)hwc->irq_period + (s64)val32;
284 atomic64_counter_set(counter, hwc->prev_count + val);
285} 317}
286 318
287static void perf_store_irq_data(struct perf_counter *counter, u64 data) 319static void perf_store_irq_data(struct perf_counter *counter, u64 data)
@@ -299,7 +331,8 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data)
299} 331}
300 332
301/* 333/*
302 * NMI-safe enable method: 334 * Save and restart an expired counter. Called by NMI contexts,
335 * so it has to be careful about preempting normal counter ops:
303 */ 336 */
304static void perf_save_and_restart(struct perf_counter *counter) 337static void perf_save_and_restart(struct perf_counter *counter)
305{ 338{
@@ -309,45 +342,25 @@ static void perf_save_and_restart(struct perf_counter *counter)
309 342
310 rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); 343 rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
311 344
312 __hw_perf_save_counter(counter, hwc, idx); 345 x86_perf_counter_update(counter, hwc, idx);
313 __hw_perf_counter_set_period(hwc, idx); 346 __hw_perf_counter_set_period(counter, hwc, idx);
314 347
315 if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) 348 if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
316 __x86_perf_counter_enable(hwc, idx); 349 __x86_perf_counter_enable(counter, hwc, idx);
317} 350}
318 351
319static void 352static void
320perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) 353perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
321{ 354{
322 struct perf_counter *counter, *group_leader = sibling->group_leader; 355 struct perf_counter *counter, *group_leader = sibling->group_leader;
323 int bit;
324
325 /*
326 * Store the counter's own timestamp first:
327 */
328 perf_store_irq_data(sibling, sibling->hw_event.type);
329 perf_store_irq_data(sibling, atomic64_counter_read(sibling));
330 356
331 /* 357 /*
332 * Then store sibling timestamps (if any): 358 * Store sibling timestamps (if any):
333 */ 359 */
334 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { 360 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
335 if (counter->state != PERF_COUNTER_STATE_ACTIVE) { 361 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
336 /*
337 * When counter was not in the overflow mask, we have to
338 * read it from hardware. We read it as well, when it
339 * has not been read yet and clear the bit in the
340 * status mask.
341 */
342 bit = counter->hw.idx;
343 if (!test_bit(bit, (unsigned long *) overflown) ||
344 test_bit(bit, (unsigned long *) status)) {
345 clear_bit(bit, (unsigned long *) status);
346 perf_save_and_restart(counter);
347 }
348 }
349 perf_store_irq_data(sibling, counter->hw_event.type); 362 perf_store_irq_data(sibling, counter->hw_event.type);
350 perf_store_irq_data(sibling, atomic64_counter_read(counter)); 363 perf_store_irq_data(sibling, atomic64_read(&counter->count));
351 } 364 }
352} 365}
353 366
@@ -540,6 +553,11 @@ void __init init_hw_perf_counters(void)
540 perf_counters_initialized = true; 553 perf_counters_initialized = true;
541} 554}
542 555
556static void x86_perf_counter_read(struct perf_counter *counter)
557{
558 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
559}
560
543static const struct hw_perf_counter_ops x86_perf_counter_ops = { 561static const struct hw_perf_counter_ops x86_perf_counter_ops = {
544 .hw_perf_counter_enable = x86_perf_counter_enable, 562 .hw_perf_counter_enable = x86_perf_counter_enable,
545 .hw_perf_counter_disable = x86_perf_counter_disable, 563 .hw_perf_counter_disable = x86_perf_counter_disable,