aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile12
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c1242
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/entry_64.S7
-rw-r--r--arch/x86/kernel/irq.c10
-rw-r--r--arch/x86/kernel/irqinit_32.c60
-rw-r--r--arch/x86/kernel/irqinit_64.c13
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/traps.c15
12 files changed, 1329 insertions, 43 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f2870920f246..e9021a908020 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -34,6 +34,7 @@
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/mm.h> 35#include <linux/mm.h>
36 36
37#include <asm/perf_counter.h>
37#include <asm/pgalloc.h> 38#include <asm/pgalloc.h>
38#include <asm/atomic.h> 39#include <asm/atomic.h>
39#include <asm/mpspec.h> 40#include <asm/mpspec.h>
@@ -761,6 +762,8 @@ static void local_apic_timer_interrupt(void)
761 inc_irq_stat(apic_timer_irqs); 762 inc_irq_stat(apic_timer_irqs);
762 763
763 evt->event_handler(evt); 764 evt->event_handler(evt);
765
766 perf_counter_unthrottle();
764} 767}
765 768
766/* 769/*
@@ -1133,6 +1136,7 @@ void __cpuinit setup_local_APIC(void)
1133 apic_write(APIC_ESR, 0); 1136 apic_write(APIC_ESR, 0);
1134 } 1137 }
1135#endif 1138#endif
1139 perf_counters_lapic_init(0);
1136 1140
1137 preempt_disable(); 1141 preempt_disable();
1138 1142
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4e242f9a06e4..3efcb2b96a15 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Makefile for x86-compatible CPU details and quirks 2# Makefile for x86-compatible CPU details, features and quirks
3# 3#
4 4
5# Don't trace early stages of a secondary CPU boot 5# Don't trace early stages of a secondary CPU boot
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
25 25
26obj-$(CONFIG_X86_MCE) += mcheck/ 26obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
27obj-$(CONFIG_MTRR) += mtrr/
28obj-$(CONFIG_CPU_FREQ) += cpufreq/
29 27
30obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 28obj-$(CONFIG_X86_MCE) += mcheck/
29obj-$(CONFIG_MTRR) += mtrr/
30obj-$(CONFIG_CPU_FREQ) += cpufreq/
31
32obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
31 33
32quiet_cmd_mkcapflags = MKCAP $@ 34quiet_cmd_mkcapflags = MKCAP $@
33 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ 35 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c1caefc82e62..591012fb949f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
13#include <linux/io.h> 13#include <linux/io.h>
14 14
15#include <asm/stackprotector.h> 15#include <asm/stackprotector.h>
16#include <asm/perf_counter.h>
16#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
17#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -854,6 +855,7 @@ void __init identify_boot_cpu(void)
854#else 855#else
855 vgetcpu_set_mode(); 856 vgetcpu_set_mode();
856#endif 857#endif
858 init_hw_perf_counters();
857} 859}
858 860
859void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 861void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 000000000000..5bfd30ab3920
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,1242 @@
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 *
10 * For licencing details see kernel-base/COPYING
11 */
12
13#include <linux/perf_counter.h>
14#include <linux/capability.h>
15#include <linux/notifier.h>
16#include <linux/hardirq.h>
17#include <linux/kprobes.h>
18#include <linux/module.h>
19#include <linux/kdebug.h>
20#include <linux/sched.h>
21#include <linux/uaccess.h>
22
23#include <asm/apic.h>
24#include <asm/stacktrace.h>
25#include <asm/nmi.h>
26
27static u64 perf_counter_mask __read_mostly;
28
29struct cpu_hw_counters {
30 struct perf_counter *counters[X86_PMC_IDX_MAX];
31 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
32 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33 unsigned long interrupts;
34 int enabled;
35};
36
37/*
38 * struct x86_pmu - generic x86 pmu
39 */
40struct x86_pmu {
41 const char *name;
42 int version;
43 int (*handle_irq)(struct pt_regs *, int);
44 void (*disable_all)(void);
45 void (*enable_all)(void);
46 void (*enable)(struct hw_perf_counter *, int);
47 void (*disable)(struct hw_perf_counter *, int);
48 unsigned eventsel;
49 unsigned perfctr;
50 u64 (*event_map)(int);
51 u64 (*raw_event)(u64);
52 int max_events;
53 int num_counters;
54 int num_counters_fixed;
55 int counter_bits;
56 u64 counter_mask;
57 u64 max_period;
58 u64 intel_ctrl;
59};
60
61static struct x86_pmu x86_pmu __read_mostly;
62
63static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
64 .enabled = 1,
65};
66
67/*
68 * Intel PerfMon v3. Used on Core2 and later.
69 */
70static const u64 intel_perfmon_event_map[] =
71{
72 [PERF_COUNT_CPU_CYCLES] = 0x003c,
73 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
74 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
75 [PERF_COUNT_CACHE_MISSES] = 0x412e,
76 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
77 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
78 [PERF_COUNT_BUS_CYCLES] = 0x013c,
79};
80
81static u64 intel_pmu_event_map(int event)
82{
83 return intel_perfmon_event_map[event];
84}
85
86static u64 intel_pmu_raw_event(u64 event)
87{
88#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
89#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
90#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
91
92#define CORE_EVNTSEL_MASK \
93 (CORE_EVNTSEL_EVENT_MASK | \
94 CORE_EVNTSEL_UNIT_MASK | \
95 CORE_EVNTSEL_COUNTER_MASK)
96
97 return event & CORE_EVNTSEL_MASK;
98}
99
100/*
101 * AMD Performance Monitor K7 and later.
102 */
103static const u64 amd_perfmon_event_map[] =
104{
105 [PERF_COUNT_CPU_CYCLES] = 0x0076,
106 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
107 [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
108 [PERF_COUNT_CACHE_MISSES] = 0x0081,
109 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
110 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
111};
112
113static u64 amd_pmu_event_map(int event)
114{
115 return amd_perfmon_event_map[event];
116}
117
118static u64 amd_pmu_raw_event(u64 event)
119{
120#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
121#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
122#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
123
124#define K7_EVNTSEL_MASK \
125 (K7_EVNTSEL_EVENT_MASK | \
126 K7_EVNTSEL_UNIT_MASK | \
127 K7_EVNTSEL_COUNTER_MASK)
128
129 return event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * Propagate counter elapsed time into the generic counter.
134 * Can only be executed on the CPU where the counter is active.
135 * Returns the delta events processed.
136 */
137static u64
138x86_perf_counter_update(struct perf_counter *counter,
139 struct hw_perf_counter *hwc, int idx)
140{
141 int shift = 64 - x86_pmu.counter_bits;
142 u64 prev_raw_count, new_raw_count;
143 s64 delta;
144
145 /*
146 * Careful: an NMI might modify the previous counter value.
147 *
148 * Our tactic to handle this is to first atomically read and
149 * exchange a new raw count - then add that new-prev delta
150 * count to the generic counter atomically:
151 */
152again:
153 prev_raw_count = atomic64_read(&hwc->prev_count);
154 rdmsrl(hwc->counter_base + idx, new_raw_count);
155
156 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
157 new_raw_count) != prev_raw_count)
158 goto again;
159
160 /*
161 * Now we have the new raw value and have updated the prev
162 * timestamp already. We can now calculate the elapsed delta
163 * (counter-)time and add that to the generic counter.
164 *
165 * Careful, not all hw sign-extends above the physical width
166 * of the count.
167 */
168 delta = (new_raw_count << shift) - (prev_raw_count << shift);
169 delta >>= shift;
170
171 atomic64_add(delta, &counter->count);
172 atomic64_sub(delta, &hwc->period_left);
173
174 return new_raw_count;
175}
176
177static atomic_t active_counters;
178static DEFINE_MUTEX(pmc_reserve_mutex);
179
180static bool reserve_pmc_hardware(void)
181{
182 int i;
183
184 if (nmi_watchdog == NMI_LOCAL_APIC)
185 disable_lapic_nmi_watchdog();
186
187 for (i = 0; i < x86_pmu.num_counters; i++) {
188 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
189 goto perfctr_fail;
190 }
191
192 for (i = 0; i < x86_pmu.num_counters; i++) {
193 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
194 goto eventsel_fail;
195 }
196
197 return true;
198
199eventsel_fail:
200 for (i--; i >= 0; i--)
201 release_evntsel_nmi(x86_pmu.eventsel + i);
202
203 i = x86_pmu.num_counters;
204
205perfctr_fail:
206 for (i--; i >= 0; i--)
207 release_perfctr_nmi(x86_pmu.perfctr + i);
208
209 if (nmi_watchdog == NMI_LOCAL_APIC)
210 enable_lapic_nmi_watchdog();
211
212 return false;
213}
214
215static void release_pmc_hardware(void)
216{
217 int i;
218
219 for (i = 0; i < x86_pmu.num_counters; i++) {
220 release_perfctr_nmi(x86_pmu.perfctr + i);
221 release_evntsel_nmi(x86_pmu.eventsel + i);
222 }
223
224 if (nmi_watchdog == NMI_LOCAL_APIC)
225 enable_lapic_nmi_watchdog();
226}
227
228static void hw_perf_counter_destroy(struct perf_counter *counter)
229{
230 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
231 release_pmc_hardware();
232 mutex_unlock(&pmc_reserve_mutex);
233 }
234}
235
236static inline int x86_pmu_initialized(void)
237{
238 return x86_pmu.handle_irq != NULL;
239}
240
241/*
242 * Setup the hardware configuration for a given hw_event_type
243 */
244static int __hw_perf_counter_init(struct perf_counter *counter)
245{
246 struct perf_counter_hw_event *hw_event = &counter->hw_event;
247 struct hw_perf_counter *hwc = &counter->hw;
248 int err;
249
250 if (!x86_pmu_initialized())
251 return -ENODEV;
252
253 err = 0;
254 if (!atomic_inc_not_zero(&active_counters)) {
255 mutex_lock(&pmc_reserve_mutex);
256 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
257 err = -EBUSY;
258 else
259 atomic_inc(&active_counters);
260 mutex_unlock(&pmc_reserve_mutex);
261 }
262 if (err)
263 return err;
264
265 /*
266 * Generate PMC IRQs:
267 * (keep 'enabled' bit clear for now)
268 */
269 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
270
271 /*
272 * Count user and OS events unless requested not to.
273 */
274 if (!hw_event->exclude_user)
275 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
276 if (!hw_event->exclude_kernel)
277 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
278
279 /*
280 * If privileged enough, allow NMI events:
281 */
282 hwc->nmi = 0;
283 if (hw_event->nmi) {
284 if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
285 return -EACCES;
286 hwc->nmi = 1;
287 }
288
289 if (!hwc->irq_period)
290 hwc->irq_period = x86_pmu.max_period;
291
292 atomic64_set(&hwc->period_left,
293 min(x86_pmu.max_period, hwc->irq_period));
294
295 /*
296 * Raw event type provide the config in the event structure
297 */
298 if (perf_event_raw(hw_event)) {
299 hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
300 } else {
301 if (perf_event_id(hw_event) >= x86_pmu.max_events)
302 return -EINVAL;
303 /*
304 * The generic map:
305 */
306 hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
307 }
308
309 counter->destroy = hw_perf_counter_destroy;
310
311 return 0;
312}
313
314static void intel_pmu_disable_all(void)
315{
316 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
317}
318
319static void amd_pmu_disable_all(void)
320{
321 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
322 int idx;
323
324 if (!cpuc->enabled)
325 return;
326
327 cpuc->enabled = 0;
328 /*
329 * ensure we write the disable before we start disabling the
330 * counters proper, so that amd_pmu_enable_counter() does the
331 * right thing.
332 */
333 barrier();
334
335 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
336 u64 val;
337
338 if (!test_bit(idx, cpuc->active_mask))
339 continue;
340 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
341 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
342 continue;
343 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
344 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
345 }
346}
347
348void hw_perf_disable(void)
349{
350 if (!x86_pmu_initialized())
351 return;
352 return x86_pmu.disable_all();
353}
354
355static void intel_pmu_enable_all(void)
356{
357 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
358}
359
360static void amd_pmu_enable_all(void)
361{
362 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
363 int idx;
364
365 if (cpuc->enabled)
366 return;
367
368 cpuc->enabled = 1;
369 barrier();
370
371 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
372 u64 val;
373
374 if (!test_bit(idx, cpuc->active_mask))
375 continue;
376 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
377 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
378 continue;
379 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
380 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
381 }
382}
383
384void hw_perf_enable(void)
385{
386 if (!x86_pmu_initialized())
387 return;
388 x86_pmu.enable_all();
389}
390
391static inline u64 intel_pmu_get_status(void)
392{
393 u64 status;
394
395 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
396
397 return status;
398}
399
400static inline void intel_pmu_ack_status(u64 ack)
401{
402 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
403}
404
405static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
406{
407 int err;
408 err = checking_wrmsrl(hwc->config_base + idx,
409 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
410}
411
412static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
413{
414 int err;
415 err = checking_wrmsrl(hwc->config_base + idx,
416 hwc->config);
417}
418
419static inline void
420intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
421{
422 int idx = __idx - X86_PMC_IDX_FIXED;
423 u64 ctrl_val, mask;
424 int err;
425
426 mask = 0xfULL << (idx * 4);
427
428 rdmsrl(hwc->config_base, ctrl_val);
429 ctrl_val &= ~mask;
430 err = checking_wrmsrl(hwc->config_base, ctrl_val);
431}
432
433static inline void
434intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
435{
436 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
437 intel_pmu_disable_fixed(hwc, idx);
438 return;
439 }
440
441 x86_pmu_disable_counter(hwc, idx);
442}
443
444static inline void
445amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
446{
447 x86_pmu_disable_counter(hwc, idx);
448}
449
450static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
451
452/*
453 * Set the next IRQ period, based on the hwc->period_left value.
454 * To be called with the counter disabled in hw:
455 */
456static void
457x86_perf_counter_set_period(struct perf_counter *counter,
458 struct hw_perf_counter *hwc, int idx)
459{
460 s64 left = atomic64_read(&hwc->period_left);
461 s64 period = min(x86_pmu.max_period, hwc->irq_period);
462 int err;
463
464 /*
465 * If we are way outside a reasoable range then just skip forward:
466 */
467 if (unlikely(left <= -period)) {
468 left = period;
469 atomic64_set(&hwc->period_left, left);
470 }
471
472 if (unlikely(left <= 0)) {
473 left += period;
474 atomic64_set(&hwc->period_left, left);
475 }
476 /*
477 * Quirk: certain CPUs dont like it if just 1 event is left:
478 */
479 if (unlikely(left < 2))
480 left = 2;
481
482 per_cpu(prev_left[idx], smp_processor_id()) = left;
483
484 /*
485 * The hw counter starts counting from this counter offset,
486 * mark it to be able to extra future deltas:
487 */
488 atomic64_set(&hwc->prev_count, (u64)-left);
489
490 err = checking_wrmsrl(hwc->counter_base + idx,
491 (u64)(-left) & x86_pmu.counter_mask);
492}
493
494static inline void
495intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
496{
497 int idx = __idx - X86_PMC_IDX_FIXED;
498 u64 ctrl_val, bits, mask;
499 int err;
500
501 /*
502 * Enable IRQ generation (0x8),
503 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
504 * if requested:
505 */
506 bits = 0x8ULL;
507 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
508 bits |= 0x2;
509 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
510 bits |= 0x1;
511 bits <<= (idx * 4);
512 mask = 0xfULL << (idx * 4);
513
514 rdmsrl(hwc->config_base, ctrl_val);
515 ctrl_val &= ~mask;
516 ctrl_val |= bits;
517 err = checking_wrmsrl(hwc->config_base, ctrl_val);
518}
519
520static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
521{
522 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
523 intel_pmu_enable_fixed(hwc, idx);
524 return;
525 }
526
527 x86_pmu_enable_counter(hwc, idx);
528}
529
530static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
531{
532 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
533
534 if (cpuc->enabled)
535 x86_pmu_enable_counter(hwc, idx);
536 else
537 x86_pmu_disable_counter(hwc, idx);
538}
539
540static int
541fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
542{
543 unsigned int event;
544
545 if (!x86_pmu.num_counters_fixed)
546 return -1;
547
548 if (unlikely(hwc->nmi))
549 return -1;
550
551 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
552
553 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
554 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
555 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
556 return X86_PMC_IDX_FIXED_CPU_CYCLES;
557 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
558 return X86_PMC_IDX_FIXED_BUS_CYCLES;
559
560 return -1;
561}
562
563/*
564 * Find a PMC slot for the freshly enabled / scheduled in counter:
565 */
566static int x86_pmu_enable(struct perf_counter *counter)
567{
568 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
569 struct hw_perf_counter *hwc = &counter->hw;
570 int idx;
571
572 idx = fixed_mode_idx(counter, hwc);
573 if (idx >= 0) {
574 /*
575 * Try to get the fixed counter, if that is already taken
576 * then try to get a generic counter:
577 */
578 if (test_and_set_bit(idx, cpuc->used_mask))
579 goto try_generic;
580
581 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
582 /*
583 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
584 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
585 */
586 hwc->counter_base =
587 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
588 hwc->idx = idx;
589 } else {
590 idx = hwc->idx;
591 /* Try to get the previous generic counter again */
592 if (test_and_set_bit(idx, cpuc->used_mask)) {
593try_generic:
594 idx = find_first_zero_bit(cpuc->used_mask,
595 x86_pmu.num_counters);
596 if (idx == x86_pmu.num_counters)
597 return -EAGAIN;
598
599 set_bit(idx, cpuc->used_mask);
600 hwc->idx = idx;
601 }
602 hwc->config_base = x86_pmu.eventsel;
603 hwc->counter_base = x86_pmu.perfctr;
604 }
605
606 perf_counters_lapic_init(hwc->nmi);
607
608 x86_pmu.disable(hwc, idx);
609
610 cpuc->counters[idx] = counter;
611 set_bit(idx, cpuc->active_mask);
612
613 x86_perf_counter_set_period(counter, hwc, idx);
614 x86_pmu.enable(hwc, idx);
615
616 return 0;
617}
618
619void perf_counter_print_debug(void)
620{
621 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
622 struct cpu_hw_counters *cpuc;
623 unsigned long flags;
624 int cpu, idx;
625
626 if (!x86_pmu.num_counters)
627 return;
628
629 local_irq_save(flags);
630
631 cpu = smp_processor_id();
632 cpuc = &per_cpu(cpu_hw_counters, cpu);
633
634 if (x86_pmu.version >= 2) {
635 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
636 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
637 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
638 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
639
640 pr_info("\n");
641 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
642 pr_info("CPU#%d: status: %016llx\n", cpu, status);
643 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
644 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
645 }
646 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
647
648 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
649 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
650 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
651
652 prev_left = per_cpu(prev_left[idx], cpu);
653
654 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
655 cpu, idx, pmc_ctrl);
656 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
657 cpu, idx, pmc_count);
658 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
659 cpu, idx, prev_left);
660 }
661 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
662 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
663
664 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
665 cpu, idx, pmc_count);
666 }
667 local_irq_restore(flags);
668}
669
670static void x86_pmu_disable(struct perf_counter *counter)
671{
672 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
673 struct hw_perf_counter *hwc = &counter->hw;
674 int idx = hwc->idx;
675
676 /*
677 * Must be done before we disable, otherwise the nmi handler
678 * could reenable again:
679 */
680 clear_bit(idx, cpuc->active_mask);
681 x86_pmu.disable(hwc, idx);
682
683 /*
684 * Make sure the cleared pointer becomes visible before we
685 * (potentially) free the counter:
686 */
687 barrier();
688
689 /*
690 * Drain the remaining delta count out of a counter
691 * that we are disabling:
692 */
693 x86_perf_counter_update(counter, hwc, idx);
694 cpuc->counters[idx] = NULL;
695 clear_bit(idx, cpuc->used_mask);
696}
697
698/*
699 * Save and restart an expired counter. Called by NMI contexts,
700 * so it has to be careful about preempting normal counter ops:
701 */
702static void intel_pmu_save_and_restart(struct perf_counter *counter)
703{
704 struct hw_perf_counter *hwc = &counter->hw;
705 int idx = hwc->idx;
706
707 x86_perf_counter_update(counter, hwc, idx);
708 x86_perf_counter_set_period(counter, hwc, idx);
709
710 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
711 intel_pmu_enable_counter(hwc, idx);
712}
713
714/*
715 * Maximum interrupt frequency of 100KHz per CPU
716 */
717#define PERFMON_MAX_INTERRUPTS (100000/HZ)
718
719/*
720 * This handler is triggered by the local APIC, so the APIC IRQ handling
721 * rules apply:
722 */
723static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
724{
725 struct cpu_hw_counters *cpuc;
726 struct cpu_hw_counters;
727 int bit, cpu, loops;
728 u64 ack, status;
729
730 cpu = smp_processor_id();
731 cpuc = &per_cpu(cpu_hw_counters, cpu);
732
733 perf_disable();
734 status = intel_pmu_get_status();
735 if (!status) {
736 perf_enable();
737 return 0;
738 }
739
740 loops = 0;
741again:
742 if (++loops > 100) {
743 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
744 return 1;
745 }
746
747 inc_irq_stat(apic_perf_irqs);
748 ack = status;
749 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
750 struct perf_counter *counter = cpuc->counters[bit];
751
752 clear_bit(bit, (unsigned long *) &status);
753 if (!test_bit(bit, cpuc->active_mask))
754 continue;
755
756 intel_pmu_save_and_restart(counter);
757 if (perf_counter_overflow(counter, nmi, regs, 0))
758 intel_pmu_disable_counter(&counter->hw, bit);
759 }
760
761 intel_pmu_ack_status(ack);
762
763 /*
764 * Repeat if there is more work to be done:
765 */
766 status = intel_pmu_get_status();
767 if (status)
768 goto again;
769
770 if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS)
771 perf_enable();
772
773 return 1;
774}
775
776static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
777{
778 int cpu, idx, throttle = 0, handled = 0;
779 struct cpu_hw_counters *cpuc;
780 struct perf_counter *counter;
781 struct hw_perf_counter *hwc;
782 u64 val;
783
784 cpu = smp_processor_id();
785 cpuc = &per_cpu(cpu_hw_counters, cpu);
786
787 if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
788 throttle = 1;
789 __perf_disable();
790 cpuc->enabled = 0;
791 barrier();
792 }
793
794 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
795 int disable = 0;
796
797 if (!test_bit(idx, cpuc->active_mask))
798 continue;
799
800 counter = cpuc->counters[idx];
801 hwc = &counter->hw;
802
803 if (counter->hw_event.nmi != nmi)
804 goto next;
805
806 val = x86_perf_counter_update(counter, hwc, idx);
807 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
808 goto next;
809
810 /* counter overflow */
811 x86_perf_counter_set_period(counter, hwc, idx);
812 handled = 1;
813 inc_irq_stat(apic_perf_irqs);
814 disable = perf_counter_overflow(counter, nmi, regs, 0);
815
816next:
817 if (disable || throttle)
818 amd_pmu_disable_counter(hwc, idx);
819 }
820
821 return handled;
822}
823
824void perf_counter_unthrottle(void)
825{
826 struct cpu_hw_counters *cpuc;
827
828 if (!x86_pmu_initialized())
829 return;
830
831 cpuc = &__get_cpu_var(cpu_hw_counters);
832 if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
833 /*
834 * Clear them before re-enabling irqs/NMIs again:
835 */
836 cpuc->interrupts = 0;
837 perf_enable();
838 } else {
839 cpuc->interrupts = 0;
840 }
841}
842
843void smp_perf_counter_interrupt(struct pt_regs *regs)
844{
845 irq_enter();
846 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
847 ack_APIC_irq();
848 x86_pmu.handle_irq(regs, 0);
849 irq_exit();
850}
851
852void smp_perf_pending_interrupt(struct pt_regs *regs)
853{
854 irq_enter();
855 ack_APIC_irq();
856 inc_irq_stat(apic_pending_irqs);
857 perf_counter_do_pending();
858 irq_exit();
859}
860
861void set_perf_counter_pending(void)
862{
863 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
864}
865
866void perf_counters_lapic_init(int nmi)
867{
868 u32 apic_val;
869
870 if (!x86_pmu_initialized())
871 return;
872
873 /*
874 * Enable the performance counter vector in the APIC LVT:
875 */
876 apic_val = apic_read(APIC_LVTERR);
877
878 apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
879 if (nmi)
880 apic_write(APIC_LVTPC, APIC_DM_NMI);
881 else
882 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
883 apic_write(APIC_LVTERR, apic_val);
884}
885
886static int __kprobes
887perf_counter_nmi_handler(struct notifier_block *self,
888 unsigned long cmd, void *__args)
889{
890 struct die_args *args = __args;
891 struct pt_regs *regs;
892
893 if (!atomic_read(&active_counters))
894 return NOTIFY_DONE;
895
896 switch (cmd) {
897 case DIE_NMI:
898 case DIE_NMI_IPI:
899 break;
900
901 default:
902 return NOTIFY_DONE;
903 }
904
905 regs = args->regs;
906
907 apic_write(APIC_LVTPC, APIC_DM_NMI);
908 /*
909 * Can't rely on the handled return value to say it was our NMI, two
910 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
911 *
912 * If the first NMI handles both, the latter will be empty and daze
913 * the CPU.
914 */
915 x86_pmu.handle_irq(regs, 1);
916
917 return NOTIFY_STOP;
918}
919
920static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
921 .notifier_call = perf_counter_nmi_handler,
922 .next = NULL,
923 .priority = 1
924};
925
926static struct x86_pmu intel_pmu = {
927 .name = "Intel",
928 .handle_irq = intel_pmu_handle_irq,
929 .disable_all = intel_pmu_disable_all,
930 .enable_all = intel_pmu_enable_all,
931 .enable = intel_pmu_enable_counter,
932 .disable = intel_pmu_disable_counter,
933 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
934 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
935 .event_map = intel_pmu_event_map,
936 .raw_event = intel_pmu_raw_event,
937 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
938 /*
939 * Intel PMCs cannot be accessed sanely above 32 bit width,
940 * so we install an artificial 1<<31 period regardless of
941 * the generic counter period:
942 */
943 .max_period = (1ULL << 31) - 1,
944};
945
946static struct x86_pmu amd_pmu = {
947 .name = "AMD",
948 .handle_irq = amd_pmu_handle_irq,
949 .disable_all = amd_pmu_disable_all,
950 .enable_all = amd_pmu_enable_all,
951 .enable = amd_pmu_enable_counter,
952 .disable = amd_pmu_disable_counter,
953 .eventsel = MSR_K7_EVNTSEL0,
954 .perfctr = MSR_K7_PERFCTR0,
955 .event_map = amd_pmu_event_map,
956 .raw_event = amd_pmu_raw_event,
957 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
958 .num_counters = 4,
959 .counter_bits = 48,
960 .counter_mask = (1ULL << 48) - 1,
961 /* use highest bit to detect overflow */
962 .max_period = (1ULL << 47) - 1,
963};
964
965static int intel_pmu_init(void)
966{
967 union cpuid10_edx edx;
968 union cpuid10_eax eax;
969 unsigned int unused;
970 unsigned int ebx;
971 int version;
972
973 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
974 return -ENODEV;
975
976 /*
977 * Check whether the Architectural PerfMon supports
978 * Branch Misses Retired Event or not.
979 */
980 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
981 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
982 return -ENODEV;
983
984 version = eax.split.version_id;
985 if (version < 2)
986 return -ENODEV;
987
988 x86_pmu = intel_pmu;
989 x86_pmu.version = version;
990 x86_pmu.num_counters = eax.split.num_counters;
991
992 /*
993 * Quirk: v2 perfmon does not report fixed-purpose counters, so
994 * assume at least 3 counters:
995 */
996 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
997
998 x86_pmu.counter_bits = eax.split.bit_width;
999 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
1000
1001 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1002
1003 return 0;
1004}
1005
1006static int amd_pmu_init(void)
1007{
1008 x86_pmu = amd_pmu;
1009 return 0;
1010}
1011
1012void __init init_hw_perf_counters(void)
1013{
1014 int err;
1015
1016 switch (boot_cpu_data.x86_vendor) {
1017 case X86_VENDOR_INTEL:
1018 err = intel_pmu_init();
1019 break;
1020 case X86_VENDOR_AMD:
1021 err = amd_pmu_init();
1022 break;
1023 default:
1024 return;
1025 }
1026 if (err != 0)
1027 return;
1028
1029 pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
1030 pr_info("... version: %d\n", x86_pmu.version);
1031 pr_info("... bit width: %d\n", x86_pmu.counter_bits);
1032
1033 pr_info("... num counters: %d\n", x86_pmu.num_counters);
1034 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1035 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1036 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1037 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1038 }
1039 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1040 perf_max_counters = x86_pmu.num_counters;
1041
1042 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
1043 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1044
1045 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1046 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1047 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1048 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1049 }
1050 pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed);
1051
1052 perf_counter_mask |=
1053 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1054
1055 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
1056
1057 perf_counters_lapic_init(0);
1058 register_die_notifier(&perf_counter_nmi_notifier);
1059}
1060
1061static inline void x86_pmu_read(struct perf_counter *counter)
1062{
1063 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1064}
1065
1066static const struct pmu pmu = {
1067 .enable = x86_pmu_enable,
1068 .disable = x86_pmu_disable,
1069 .read = x86_pmu_read,
1070};
1071
1072const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1073{
1074 int err;
1075
1076 err = __hw_perf_counter_init(counter);
1077 if (err)
1078 return ERR_PTR(err);
1079
1080 return &pmu;
1081}
1082
1083/*
1084 * callchain support
1085 */
1086
1087static inline
1088void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
1089{
1090 if (entry->nr < MAX_STACK_DEPTH)
1091 entry->ip[entry->nr++] = ip;
1092}
1093
1094static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1095static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1096
1097
1098static void
1099backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1100{
1101 /* Ignore warnings */
1102}
1103
1104static void backtrace_warning(void *data, char *msg)
1105{
1106 /* Ignore warnings */
1107}
1108
1109static int backtrace_stack(void *data, char *name)
1110{
1111 /* Don't bother with IRQ stacks for now */
1112 return -1;
1113}
1114
1115static void backtrace_address(void *data, unsigned long addr, int reliable)
1116{
1117 struct perf_callchain_entry *entry = data;
1118
1119 if (reliable)
1120 callchain_store(entry, addr);
1121}
1122
1123static const struct stacktrace_ops backtrace_ops = {
1124 .warning = backtrace_warning,
1125 .warning_symbol = backtrace_warning_symbol,
1126 .stack = backtrace_stack,
1127 .address = backtrace_address,
1128};
1129
1130static void
1131perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1132{
1133 unsigned long bp;
1134 char *stack;
1135 int nr = entry->nr;
1136
1137 callchain_store(entry, instruction_pointer(regs));
1138
1139 stack = ((char *)regs + sizeof(struct pt_regs));
1140#ifdef CONFIG_FRAME_POINTER
1141 bp = frame_pointer(regs);
1142#else
1143 bp = 0;
1144#endif
1145
1146 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
1147
1148 entry->kernel = entry->nr - nr;
1149}
1150
1151
1152struct stack_frame {
1153 const void __user *next_fp;
1154 unsigned long return_address;
1155};
1156
1157static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1158{
1159 int ret;
1160
1161 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1162 return 0;
1163
1164 ret = 1;
1165 pagefault_disable();
1166 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1167 ret = 0;
1168 pagefault_enable();
1169
1170 return ret;
1171}
1172
1173static void
1174perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1175{
1176 struct stack_frame frame;
1177 const void __user *fp;
1178 int nr = entry->nr;
1179
1180 regs = (struct pt_regs *)current->thread.sp0 - 1;
1181 fp = (void __user *)regs->bp;
1182
1183 callchain_store(entry, regs->ip);
1184
1185 while (entry->nr < MAX_STACK_DEPTH) {
1186 frame.next_fp = NULL;
1187 frame.return_address = 0;
1188
1189 if (!copy_stack_frame(fp, &frame))
1190 break;
1191
1192 if ((unsigned long)fp < user_stack_pointer(regs))
1193 break;
1194
1195 callchain_store(entry, frame.return_address);
1196 fp = frame.next_fp;
1197 }
1198
1199 entry->user = entry->nr - nr;
1200}
1201
1202static void
1203perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1204{
1205 int is_user;
1206
1207 if (!regs)
1208 return;
1209
1210 is_user = user_mode(regs);
1211
1212 if (!current || current->pid == 0)
1213 return;
1214
1215 if (is_user && current->state != TASK_RUNNING)
1216 return;
1217
1218 if (!is_user)
1219 perf_callchain_kernel(regs, entry);
1220
1221 if (current->mm)
1222 perf_callchain_user(regs, entry);
1223}
1224
1225struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1226{
1227 struct perf_callchain_entry *entry;
1228
1229 if (in_nmi())
1230 entry = &__get_cpu_var(nmi_entry);
1231 else
1232 entry = &__get_cpu_var(irq_entry);
1233
1234 entry->nr = 0;
1235 entry->hv = 0;
1236 entry->kernel = 0;
1237 entry->user = 0;
1238
1239 perf_do_callchain(regs, entry);
1240
1241 return entry;
1242}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f6c70a164e32..d6f5b9fbde32 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,8 +19,8 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/genapic.h> 22#include <asm/apic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/perf_counter.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr; 26 unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6e8433..891004619142 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1025,6 +1025,13 @@ apicinterrupt ERROR_APIC_VECTOR \
1025apicinterrupt SPURIOUS_APIC_VECTOR \ 1025apicinterrupt SPURIOUS_APIC_VECTOR \
1026 spurious_interrupt smp_spurious_interrupt 1026 spurious_interrupt smp_spurious_interrupt
1027 1027
1028#ifdef CONFIG_PERF_COUNTERS
1029apicinterrupt LOCAL_PERF_VECTOR \
1030 perf_counter_interrupt smp_perf_counter_interrupt
1031apicinterrupt LOCAL_PENDING_VECTOR \
1032 perf_pending_interrupt smp_perf_pending_interrupt
1033#endif
1034
1028/* 1035/*
1029 * Exception entry points. 1036 * Exception entry points.
1030 */ 1037 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010d74c8..8279fb8df17f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,6 +63,14 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "%*s: ", prec, "CNT");
67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND");
71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
73 seq_printf(p, " Performance pending work\n");
66#endif 74#endif
67 if (generic_interrupt_extension) { 75 if (generic_interrupt_extension) {
68 seq_printf(p, "%*s: ", prec, "PLT"); 76 seq_printf(p, "%*s: ", prec, "PLT");
@@ -166,6 +174,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
166#ifdef CONFIG_X86_LOCAL_APIC 174#ifdef CONFIG_X86_LOCAL_APIC
167 sum += irq_stats(cpu)->apic_timer_irqs; 175 sum += irq_stats(cpu)->apic_timer_irqs;
168 sum += irq_stats(cpu)->irq_spurious_count; 176 sum += irq_stats(cpu)->irq_spurious_count;
177 sum += irq_stats(cpu)->apic_perf_irqs;
178 sum += irq_stats(cpu)->apic_pending_irqs;
169#endif 179#endif
170 if (generic_interrupt_extension) 180 if (generic_interrupt_extension)
171 sum += irq_stats(cpu)->generic_irqs; 181 sum += irq_stats(cpu)->generic_irqs;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 368b0a8836f9..3190a6b961e6 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -118,28 +118,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
118 return 0; 118 return 0;
119} 119}
120 120
121/* Overridden in paravirt.c */ 121static void __init smp_intr_init(void)
122void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
123
124void __init native_init_IRQ(void)
125{ 122{
126 int i;
127
128 /* Execute any quirks before the call gates are initialised: */
129 x86_quirk_pre_intr_init();
130
131 /*
132 * Cover the whole vector space, no vector can escape
133 * us. (some of these will be overridden and become
134 * 'special' SMP interrupts)
135 */
136 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
137 /* SYSCALL_VECTOR was reserved in trap_init. */
138 if (i != SYSCALL_VECTOR)
139 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
140 }
141
142
143#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) 123#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
144 /* 124 /*
145 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 125 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -168,6 +148,11 @@ void __init native_init_IRQ(void)
168 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 148 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
169 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 149 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
170#endif 150#endif
151}
152
153static void __init apic_intr_init(void)
154{
155 smp_intr_init();
171 156
172#ifdef CONFIG_X86_LOCAL_APIC 157#ifdef CONFIG_X86_LOCAL_APIC
173 /* self generated IPI for local APIC timer */ 158 /* self generated IPI for local APIC timer */
@@ -179,12 +164,41 @@ void __init native_init_IRQ(void)
179 /* IPI vectors for APIC spurious and error interrupts */ 164 /* IPI vectors for APIC spurious and error interrupts */
180 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 165 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
181 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 166 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
182#endif 167# ifdef CONFIG_PERF_COUNTERS
168 alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
169 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
170# endif
183 171
184#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) 172# ifdef CONFIG_X86_MCE_P4THERMAL
185 /* thermal monitor LVT interrupt */ 173 /* thermal monitor LVT interrupt */
186 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 174 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
175# endif
187#endif 176#endif
177}
178
179/* Overridden in paravirt.c */
180void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
181
182void __init native_init_IRQ(void)
183{
184 int i;
185
186 /* Execute any quirks before the call gates are initialised: */
187 x86_quirk_pre_intr_init();
188
189 apic_intr_init();
190
191 /*
192 * Cover the whole vector space, no vector can escape
193 * us. (some of these will be overridden and become
194 * 'special' SMP interrupts)
195 */
196 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
197 int vector = FIRST_EXTERNAL_VECTOR + i;
198 /* SYSCALL_VECTOR was reserved in trap_init. */
199 if (!test_bit(vector, used_vectors))
200 set_intr_gate(vector, interrupt[i]);
201 }
188 202
189 if (!acpi_ioapic) 203 if (!acpi_ioapic)
190 setup_irq(2, &irq2); 204 setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8cd10537fd46..53ceb26f80ff 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -152,6 +152,12 @@ static void __init apic_intr_init(void)
152 /* IPI vectors for APIC spurious and error interrupts */ 152 /* IPI vectors for APIC spurious and error interrupts */
153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
155
156 /* Performance monitoring interrupt: */
157#ifdef CONFIG_PERF_COUNTERS
158 alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
159 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
160#endif
155} 161}
156 162
157void __init native_init_IRQ(void) 163void __init native_init_IRQ(void)
@@ -159,6 +165,9 @@ void __init native_init_IRQ(void)
159 int i; 165 int i;
160 166
161 init_ISA_irqs(); 167 init_ISA_irqs();
168
169 apic_intr_init();
170
162 /* 171 /*
163 * Cover the whole vector space, no vector can escape 172 * Cover the whole vector space, no vector can escape
164 * us. (some of these will be overridden and become 173 * us. (some of these will be overridden and become
@@ -166,12 +175,10 @@ void __init native_init_IRQ(void)
166 */ 175 */
167 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { 176 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
168 int vector = FIRST_EXTERNAL_VECTOR + i; 177 int vector = FIRST_EXTERNAL_VECTOR + i;
169 if (vector != IA32_SYSCALL_VECTOR) 178 if (!test_bit(vector, used_vectors))
170 set_intr_gate(vector, interrupt[i]); 179 set_intr_gate(vector, interrupt[i]);
171 } 180 }
172 181
173 apic_intr_init();
174
175 if (!acpi_ioapic) 182 if (!acpi_ioapic)
176 setup_irq(2, &irq2); 183 setup_irq(2, &irq2);
177} 184}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 14425166b8e3..0a813b17b172 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen 7 * 2000-2002 x86-64 support by Andi Kleen
8 */ 8 */
9
10#include <linux/sched.h> 9#include <linux/sched.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c8736b491..d51321ddafda 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,5 @@ ENTRY(sys_call_table)
334 .long sys_inotify_init1 334 .long sys_inotify_init1
335 .long sys_preadv 335 .long sys_preadv
336 .long sys_pwritev 336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_counter_open
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d288327ff0..2cc162e09c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -945,8 +945,13 @@ void __init trap_init(void)
945#endif 945#endif
946 set_intr_gate(19, &simd_coprocessor_error); 946 set_intr_gate(19, &simd_coprocessor_error);
947 947
948 /* Reserve all the builtin and the syscall vector: */
949 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
950 set_bit(i, used_vectors);
951
948#ifdef CONFIG_IA32_EMULATION 952#ifdef CONFIG_IA32_EMULATION
949 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 953 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
954 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
950#endif 955#endif
951 956
952#ifdef CONFIG_X86_32 957#ifdef CONFIG_X86_32
@@ -963,17 +968,9 @@ void __init trap_init(void)
963 } 968 }
964 969
965 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 970 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
966#endif
967
968 /* Reserve all the builtin and the syscall vector: */
969 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
970 set_bit(i, used_vectors);
971
972#ifdef CONFIG_X86_64
973 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
974#else
975 set_bit(SYSCALL_VECTOR, used_vectors); 971 set_bit(SYSCALL_VECTOR, used_vectors);
976#endif 972#endif
973
977 /* 974 /*
978 * Should be a barrier for any external CPU state: 975 * Should be a barrier for any external CPU state:
979 */ 976 */