diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-15 21:31:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-15 21:31:30 -0400 |
commit | a926021cb1f8a99a275eaf6eb546102e9469dc59 (patch) | |
tree | c6d0300cd4b1a1fd658708476db4577b68b4de31 /arch/x86/kernel | |
parent | 0586bed3e8563c2eb89bc7256e30ce633ae06cfb (diff) | |
parent | 5e814dd597c42daeb8d2a276e64a6ec986ad0e2a (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (184 commits)
perf probe: Clean up probe_point_lazy_walker() return value
tracing: Fix irqoff selftest expanding max buffer
tracing: Align 4 byte ints together in struct tracer
tracing: Export trace_set_clr_event()
tracing: Explain about unstable clock on resume with ring buffer warning
ftrace/graph: Trace function entry before updating index
ftrace: Add .ref.text as one of the safe areas to trace
tracing: Adjust conditional expression latency formatting.
tracing: Fix event alignment: skb:kfree_skb
tracing: Fix event alignment: mce:mce_record
tracing: Fix event alignment: kvm:kvm_hv_hypercall
tracing: Fix event alignment: module:module_request
tracing: Fix event alignment: ftrace:context_switch and ftrace:wakeup
tracing: Remove lock_depth from event entry
perf header: Stop using 'self'
perf session: Use evlist/evsel for managing perf.data attributes
perf top: Don't let events to eat up whole header line
perf top: Fix events overflow in top command
ring-buffer: Remove unused #include <linux/trace_irq.h>
tracing: Add an 'overwrite' trace_option.
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 170 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 175 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 417 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 97 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p6.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perfctr-watchdog.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 1 |
14 files changed, 814 insertions, 131 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d977a2ea693..26604188aa49 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | #include <asm/smp.h> | ||
33 | 34 | ||
34 | #if 0 | 35 | #if 0 |
35 | #undef wrmsrl | 36 | #undef wrmsrl |
@@ -93,6 +94,8 @@ struct amd_nb { | |||
93 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
94 | }; | 95 | }; |
95 | 96 | ||
97 | struct intel_percore; | ||
98 | |||
96 | #define MAX_LBR_ENTRIES 16 | 99 | #define MAX_LBR_ENTRIES 16 |
97 | 100 | ||
98 | struct cpu_hw_events { | 101 | struct cpu_hw_events { |
@@ -128,6 +131,13 @@ struct cpu_hw_events { | |||
128 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 131 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
129 | 132 | ||
130 | /* | 133 | /* |
134 | * Intel percore register state. | ||
135 | * Coordinate shared resources between HT threads. | ||
136 | */ | ||
137 | int percore_used; /* Used by this CPU? */ | ||
138 | struct intel_percore *per_core; | ||
139 | |||
140 | /* | ||
131 | * AMD specific bits | 141 | * AMD specific bits |
132 | */ | 142 | */ |
133 | struct amd_nb *amd_nb; | 143 | struct amd_nb *amd_nb; |
@@ -166,8 +176,10 @@ struct cpu_hw_events { | |||
166 | /* | 176 | /* |
167 | * Constraint on the Event code + UMask | 177 | * Constraint on the Event code + UMask |
168 | */ | 178 | */ |
169 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | 179 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ |
170 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | 180 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) |
181 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
182 | INTEL_UEVENT_CONSTRAINT(c, n) | ||
171 | 183 | ||
172 | #define EVENT_CONSTRAINT_END \ | 184 | #define EVENT_CONSTRAINT_END \ |
173 | EVENT_CONSTRAINT(0, 0, 0) | 185 | EVENT_CONSTRAINT(0, 0, 0) |
@@ -175,6 +187,28 @@ struct cpu_hw_events { | |||
175 | #define for_each_event_constraint(e, c) \ | 187 | #define for_each_event_constraint(e, c) \ |
176 | for ((e) = (c); (e)->weight; (e)++) | 188 | for ((e) = (c); (e)->weight; (e)++) |
177 | 189 | ||
190 | /* | ||
191 | * Extra registers for specific events. | ||
192 | * Some events need large masks and require external MSRs. | ||
193 | * Define a mapping to these extra registers. | ||
194 | */ | ||
195 | struct extra_reg { | ||
196 | unsigned int event; | ||
197 | unsigned int msr; | ||
198 | u64 config_mask; | ||
199 | u64 valid_mask; | ||
200 | }; | ||
201 | |||
202 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | ||
203 | .event = (e), \ | ||
204 | .msr = (ms), \ | ||
205 | .config_mask = (m), \ | ||
206 | .valid_mask = (vm), \ | ||
207 | } | ||
208 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | ||
209 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | ||
210 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | ||
211 | |||
178 | union perf_capabilities { | 212 | union perf_capabilities { |
179 | struct { | 213 | struct { |
180 | u64 lbr_format : 6; | 214 | u64 lbr_format : 6; |
@@ -219,6 +253,7 @@ struct x86_pmu { | |||
219 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 253 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
220 | struct perf_event *event); | 254 | struct perf_event *event); |
221 | struct event_constraint *event_constraints; | 255 | struct event_constraint *event_constraints; |
256 | struct event_constraint *percore_constraints; | ||
222 | void (*quirks)(void); | 257 | void (*quirks)(void); |
223 | int perfctr_second_write; | 258 | int perfctr_second_write; |
224 | 259 | ||
@@ -247,6 +282,11 @@ struct x86_pmu { | |||
247 | */ | 282 | */ |
248 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 283 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
249 | int lbr_nr; /* hardware stack size */ | 284 | int lbr_nr; /* hardware stack size */ |
285 | |||
286 | /* | ||
287 | * Extra registers for events | ||
288 | */ | ||
289 | struct extra_reg *extra_regs; | ||
250 | }; | 290 | }; |
251 | 291 | ||
252 | static struct x86_pmu x86_pmu __read_mostly; | 292 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids | |||
271 | [PERF_COUNT_HW_CACHE_MAX] | 311 | [PERF_COUNT_HW_CACHE_MAX] |
272 | [PERF_COUNT_HW_CACHE_OP_MAX] | 312 | [PERF_COUNT_HW_CACHE_OP_MAX] |
273 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 313 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
314 | static u64 __read_mostly hw_cache_extra_regs | ||
315 | [PERF_COUNT_HW_CACHE_MAX] | ||
316 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
317 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
274 | 318 | ||
275 | /* | 319 | /* |
276 | * Propagate event elapsed time into the generic event. | 320 | * Propagate event elapsed time into the generic event. |
@@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event) | |||
298 | */ | 342 | */ |
299 | again: | 343 | again: |
300 | prev_raw_count = local64_read(&hwc->prev_count); | 344 | prev_raw_count = local64_read(&hwc->prev_count); |
301 | rdmsrl(hwc->event_base + idx, new_raw_count); | 345 | rdmsrl(hwc->event_base, new_raw_count); |
302 | 346 | ||
303 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 347 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
304 | new_raw_count) != prev_raw_count) | 348 | new_raw_count) != prev_raw_count) |
@@ -321,6 +365,49 @@ again: | |||
321 | return new_raw_count; | 365 | return new_raw_count; |
322 | } | 366 | } |
323 | 367 | ||
368 | /* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ | ||
369 | static inline int x86_pmu_addr_offset(int index) | ||
370 | { | ||
371 | if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) | ||
372 | return index << 1; | ||
373 | return index; | ||
374 | } | ||
375 | |||
376 | static inline unsigned int x86_pmu_config_addr(int index) | ||
377 | { | ||
378 | return x86_pmu.eventsel + x86_pmu_addr_offset(index); | ||
379 | } | ||
380 | |||
381 | static inline unsigned int x86_pmu_event_addr(int index) | ||
382 | { | ||
383 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * Find and validate any extra registers to set up. | ||
388 | */ | ||
389 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | ||
390 | { | ||
391 | struct extra_reg *er; | ||
392 | |||
393 | event->hw.extra_reg = 0; | ||
394 | event->hw.extra_config = 0; | ||
395 | |||
396 | if (!x86_pmu.extra_regs) | ||
397 | return 0; | ||
398 | |||
399 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
400 | if (er->event != (config & er->config_mask)) | ||
401 | continue; | ||
402 | if (event->attr.config1 & ~er->valid_mask) | ||
403 | return -EINVAL; | ||
404 | event->hw.extra_reg = er->msr; | ||
405 | event->hw.extra_config = event->attr.config1; | ||
406 | break; | ||
407 | } | ||
408 | return 0; | ||
409 | } | ||
410 | |||
324 | static atomic_t active_events; | 411 | static atomic_t active_events; |
325 | static DEFINE_MUTEX(pmc_reserve_mutex); | 412 | static DEFINE_MUTEX(pmc_reserve_mutex); |
326 | 413 | ||
@@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void) | |||
331 | int i; | 418 | int i; |
332 | 419 | ||
333 | for (i = 0; i < x86_pmu.num_counters; i++) { | 420 | for (i = 0; i < x86_pmu.num_counters; i++) { |
334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 421 | if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) |
335 | goto perfctr_fail; | 422 | goto perfctr_fail; |
336 | } | 423 | } |
337 | 424 | ||
338 | for (i = 0; i < x86_pmu.num_counters; i++) { | 425 | for (i = 0; i < x86_pmu.num_counters; i++) { |
339 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 426 | if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) |
340 | goto eventsel_fail; | 427 | goto eventsel_fail; |
341 | } | 428 | } |
342 | 429 | ||
@@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void) | |||
344 | 431 | ||
345 | eventsel_fail: | 432 | eventsel_fail: |
346 | for (i--; i >= 0; i--) | 433 | for (i--; i >= 0; i--) |
347 | release_evntsel_nmi(x86_pmu.eventsel + i); | 434 | release_evntsel_nmi(x86_pmu_config_addr(i)); |
348 | 435 | ||
349 | i = x86_pmu.num_counters; | 436 | i = x86_pmu.num_counters; |
350 | 437 | ||
351 | perfctr_fail: | 438 | perfctr_fail: |
352 | for (i--; i >= 0; i--) | 439 | for (i--; i >= 0; i--) |
353 | release_perfctr_nmi(x86_pmu.perfctr + i); | 440 | release_perfctr_nmi(x86_pmu_event_addr(i)); |
354 | 441 | ||
355 | return false; | 442 | return false; |
356 | } | 443 | } |
@@ -360,8 +447,8 @@ static void release_pmc_hardware(void) | |||
360 | int i; | 447 | int i; |
361 | 448 | ||
362 | for (i = 0; i < x86_pmu.num_counters; i++) { | 449 | for (i = 0; i < x86_pmu.num_counters; i++) { |
363 | release_perfctr_nmi(x86_pmu.perfctr + i); | 450 | release_perfctr_nmi(x86_pmu_event_addr(i)); |
364 | release_evntsel_nmi(x86_pmu.eventsel + i); | 451 | release_evntsel_nmi(x86_pmu_config_addr(i)); |
365 | } | 452 | } |
366 | } | 453 | } |
367 | 454 | ||
@@ -382,7 +469,7 @@ static bool check_hw_exists(void) | |||
382 | * complain and bail. | 469 | * complain and bail. |
383 | */ | 470 | */ |
384 | for (i = 0; i < x86_pmu.num_counters; i++) { | 471 | for (i = 0; i < x86_pmu.num_counters; i++) { |
385 | reg = x86_pmu.eventsel + i; | 472 | reg = x86_pmu_config_addr(i); |
386 | ret = rdmsrl_safe(reg, &val); | 473 | ret = rdmsrl_safe(reg, &val); |
387 | if (ret) | 474 | if (ret) |
388 | goto msr_fail; | 475 | goto msr_fail; |
@@ -407,8 +494,8 @@ static bool check_hw_exists(void) | |||
407 | * that don't trap on the MSR access and always return 0s. | 494 | * that don't trap on the MSR access and always return 0s. |
408 | */ | 495 | */ |
409 | val = 0xabcdUL; | 496 | val = 0xabcdUL; |
410 | ret = checking_wrmsrl(x86_pmu.perfctr, val); | 497 | ret = checking_wrmsrl(x86_pmu_event_addr(0), val); |
411 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); | 498 | ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); |
412 | if (ret || val != val_new) | 499 | if (ret || val != val_new) |
413 | goto msr_fail; | 500 | goto msr_fail; |
414 | 501 | ||
@@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void) | |||
442 | } | 529 | } |
443 | 530 | ||
444 | static inline int | 531 | static inline int |
445 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | 532 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) |
446 | { | 533 | { |
534 | struct perf_event_attr *attr = &event->attr; | ||
447 | unsigned int cache_type, cache_op, cache_result; | 535 | unsigned int cache_type, cache_op, cache_result; |
448 | u64 config, val; | 536 | u64 config, val; |
449 | 537 | ||
@@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
470 | return -EINVAL; | 558 | return -EINVAL; |
471 | 559 | ||
472 | hwc->config |= val; | 560 | hwc->config |= val; |
473 | 561 | attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; | |
474 | return 0; | 562 | return x86_pmu_extra_regs(val, event); |
475 | } | 563 | } |
476 | 564 | ||
477 | static int x86_setup_perfctr(struct perf_event *event) | 565 | static int x86_setup_perfctr(struct perf_event *event) |
@@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
496 | } | 584 | } |
497 | 585 | ||
498 | if (attr->type == PERF_TYPE_RAW) | 586 | if (attr->type == PERF_TYPE_RAW) |
499 | return 0; | 587 | return x86_pmu_extra_regs(event->attr.config, event); |
500 | 588 | ||
501 | if (attr->type == PERF_TYPE_HW_CACHE) | 589 | if (attr->type == PERF_TYPE_HW_CACHE) |
502 | return set_ext_hw_attr(hwc, attr); | 590 | return set_ext_hw_attr(hwc, event); |
503 | 591 | ||
504 | if (attr->config >= x86_pmu.max_events) | 592 | if (attr->config >= x86_pmu.max_events) |
505 | return -EINVAL; | 593 | return -EINVAL; |
@@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void) | |||
617 | 705 | ||
618 | if (!test_bit(idx, cpuc->active_mask)) | 706 | if (!test_bit(idx, cpuc->active_mask)) |
619 | continue; | 707 | continue; |
620 | rdmsrl(x86_pmu.eventsel + idx, val); | 708 | rdmsrl(x86_pmu_config_addr(idx), val); |
621 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) | 709 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) |
622 | continue; | 710 | continue; |
623 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | 711 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
624 | wrmsrl(x86_pmu.eventsel + idx, val); | 712 | wrmsrl(x86_pmu_config_addr(idx), val); |
625 | } | 713 | } |
626 | } | 714 | } |
627 | 715 | ||
@@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
642 | x86_pmu.disable_all(); | 730 | x86_pmu.disable_all(); |
643 | } | 731 | } |
644 | 732 | ||
733 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | ||
734 | u64 enable_mask) | ||
735 | { | ||
736 | if (hwc->extra_reg) | ||
737 | wrmsrl(hwc->extra_reg, hwc->extra_config); | ||
738 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | ||
739 | } | ||
740 | |||
645 | static void x86_pmu_enable_all(int added) | 741 | static void x86_pmu_enable_all(int added) |
646 | { | 742 | { |
647 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 743 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
648 | int idx; | 744 | int idx; |
649 | 745 | ||
650 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 746 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
651 | struct perf_event *event = cpuc->events[idx]; | 747 | struct hw_perf_event *hwc = &cpuc->events[idx]->hw; |
652 | u64 val; | ||
653 | 748 | ||
654 | if (!test_bit(idx, cpuc->active_mask)) | 749 | if (!test_bit(idx, cpuc->active_mask)) |
655 | continue; | 750 | continue; |
656 | 751 | ||
657 | val = event->hw.config; | 752 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
658 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
659 | wrmsrl(x86_pmu.eventsel + idx, val); | ||
660 | } | 753 | } |
661 | } | 754 | } |
662 | 755 | ||
@@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event, | |||
821 | hwc->event_base = 0; | 914 | hwc->event_base = 0; |
822 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | 915 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { |
823 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | 916 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; |
824 | /* | 917 | hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; |
825 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
826 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
827 | */ | ||
828 | hwc->event_base = | ||
829 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
830 | } else { | 918 | } else { |
831 | hwc->config_base = x86_pmu.eventsel; | 919 | hwc->config_base = x86_pmu_config_addr(hwc->idx); |
832 | hwc->event_base = x86_pmu.perfctr; | 920 | hwc->event_base = x86_pmu_event_addr(hwc->idx); |
833 | } | 921 | } |
834 | } | 922 | } |
835 | 923 | ||
@@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
915 | x86_pmu.enable_all(added); | 1003 | x86_pmu.enable_all(added); |
916 | } | 1004 | } |
917 | 1005 | ||
918 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | ||
919 | u64 enable_mask) | ||
920 | { | ||
921 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); | ||
922 | } | ||
923 | |||
924 | static inline void x86_pmu_disable_event(struct perf_event *event) | 1006 | static inline void x86_pmu_disable_event(struct perf_event *event) |
925 | { | 1007 | { |
926 | struct hw_perf_event *hwc = &event->hw; | 1008 | struct hw_perf_event *hwc = &event->hw; |
927 | 1009 | ||
928 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 1010 | wrmsrl(hwc->config_base, hwc->config); |
929 | } | 1011 | } |
930 | 1012 | ||
931 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 1013 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
978 | */ | 1060 | */ |
979 | local64_set(&hwc->prev_count, (u64)-left); | 1061 | local64_set(&hwc->prev_count, (u64)-left); |
980 | 1062 | ||
981 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); | 1063 | wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); |
982 | 1064 | ||
983 | /* | 1065 | /* |
984 | * Due to erratum on certan cpu we need | 1066 | * Due to erratum on certan cpu we need |
@@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
986 | * is updated properly | 1068 | * is updated properly |
987 | */ | 1069 | */ |
988 | if (x86_pmu.perfctr_second_write) { | 1070 | if (x86_pmu.perfctr_second_write) { |
989 | wrmsrl(hwc->event_base + idx, | 1071 | wrmsrl(hwc->event_base, |
990 | (u64)(-left) & x86_pmu.cntval_mask); | 1072 | (u64)(-left) & x86_pmu.cntval_mask); |
991 | } | 1073 | } |
992 | 1074 | ||
@@ -1113,8 +1195,8 @@ void perf_event_print_debug(void) | |||
1113 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1195 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1114 | 1196 | ||
1115 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1197 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1116 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1198 | rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); |
1117 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1199 | rdmsrl(x86_pmu_event_addr(idx), pmc_count); |
1118 | 1200 | ||
1119 | prev_left = per_cpu(pmc_prev_left[idx], cpu); | 1201 | prev_left = per_cpu(pmc_prev_left[idx], cpu); |
1120 | 1202 | ||
@@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void) | |||
1389 | pr_info("no hardware sampling interrupt available.\n"); | 1471 | pr_info("no hardware sampling interrupt available.\n"); |
1390 | } | 1472 | } |
1391 | 1473 | ||
1392 | int __init init_hw_perf_events(void) | 1474 | static int __init init_hw_perf_events(void) |
1393 | { | 1475 | { |
1394 | struct event_constraint *c; | 1476 | struct event_constraint *c; |
1395 | int err; | 1477 | int err; |
@@ -1608,7 +1690,7 @@ out: | |||
1608 | return ret; | 1690 | return ret; |
1609 | } | 1691 | } |
1610 | 1692 | ||
1611 | int x86_pmu_event_init(struct perf_event *event) | 1693 | static int x86_pmu_event_init(struct perf_event *event) |
1612 | { | 1694 | { |
1613 | struct pmu *tmp; | 1695 | struct pmu *tmp; |
1614 | int err; | 1696 | int err; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67e2202a6039..461f62bbd774 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
127 | /* | 127 | /* |
128 | * AMD64 events are detected based on their event codes. | 128 | * AMD64 events are detected based on their event codes. |
129 | */ | 129 | */ |
130 | static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) | ||
131 | { | ||
132 | return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); | ||
133 | } | ||
134 | |||
130 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | 135 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) |
131 | { | 136 | { |
132 | return (hwc->config & 0xe0) == 0xe0; | 137 | return (hwc->config & 0xe0) == 0xe0; |
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
385 | .cpu_dead = amd_pmu_cpu_dead, | 390 | .cpu_dead = amd_pmu_cpu_dead, |
386 | }; | 391 | }; |
387 | 392 | ||
393 | /* AMD Family 15h */ | ||
394 | |||
395 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL | ||
396 | |||
397 | #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL | ||
398 | #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL | ||
399 | #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL | ||
400 | #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL | ||
401 | #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL | ||
402 | #define AMD_EVENT_EX_LS 0x000000C0ULL | ||
403 | #define AMD_EVENT_DE 0x000000D0ULL | ||
404 | #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL | ||
405 | |||
406 | /* | ||
407 | * AMD family 15h event code/PMC mappings: | ||
408 | * | ||
409 | * type = event_code & 0x0F0: | ||
410 | * | ||
411 | * 0x000 FP PERF_CTL[5:3] | ||
412 | * 0x010 FP PERF_CTL[5:3] | ||
413 | * 0x020 LS PERF_CTL[5:0] | ||
414 | * 0x030 LS PERF_CTL[5:0] | ||
415 | * 0x040 DC PERF_CTL[5:0] | ||
416 | * 0x050 DC PERF_CTL[5:0] | ||
417 | * 0x060 CU PERF_CTL[2:0] | ||
418 | * 0x070 CU PERF_CTL[2:0] | ||
419 | * 0x080 IC/DE PERF_CTL[2:0] | ||
420 | * 0x090 IC/DE PERF_CTL[2:0] | ||
421 | * 0x0A0 --- | ||
422 | * 0x0B0 --- | ||
423 | * 0x0C0 EX/LS PERF_CTL[5:0] | ||
424 | * 0x0D0 DE PERF_CTL[2:0] | ||
425 | * 0x0E0 NB NB_PERF_CTL[3:0] | ||
426 | * 0x0F0 NB NB_PERF_CTL[3:0] | ||
427 | * | ||
428 | * Exceptions: | ||
429 | * | ||
430 | * 0x003 FP PERF_CTL[3] | ||
431 | * 0x00B FP PERF_CTL[3] | ||
432 | * 0x00D FP PERF_CTL[3] | ||
433 | * 0x023 DE PERF_CTL[2:0] | ||
434 | * 0x02D LS PERF_CTL[3] | ||
435 | * 0x02E LS PERF_CTL[3,0] | ||
436 | * 0x043 CU PERF_CTL[2:0] | ||
437 | * 0x045 CU PERF_CTL[2:0] | ||
438 | * 0x046 CU PERF_CTL[2:0] | ||
439 | * 0x054 CU PERF_CTL[2:0] | ||
440 | * 0x055 CU PERF_CTL[2:0] | ||
441 | * 0x08F IC PERF_CTL[0] | ||
442 | * 0x187 DE PERF_CTL[0] | ||
443 | * 0x188 DE PERF_CTL[0] | ||
444 | * 0x0DB EX PERF_CTL[5:0] | ||
445 | * 0x0DC LS PERF_CTL[5:0] | ||
446 | * 0x0DD LS PERF_CTL[5:0] | ||
447 | * 0x0DE LS PERF_CTL[5:0] | ||
448 | * 0x0DF LS PERF_CTL[5:0] | ||
449 | * 0x1D6 EX PERF_CTL[5:0] | ||
450 | * 0x1D8 EX PERF_CTL[5:0] | ||
451 | */ | ||
452 | |||
453 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | ||
454 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
455 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | ||
456 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | ||
457 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | ||
458 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
459 | |||
460 | static struct event_constraint * | ||
461 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
462 | { | ||
463 | unsigned int event_code = amd_get_event_code(&event->hw); | ||
464 | |||
465 | switch (event_code & AMD_EVENT_TYPE_MASK) { | ||
466 | case AMD_EVENT_FP: | ||
467 | switch (event_code) { | ||
468 | case 0x003: | ||
469 | case 0x00B: | ||
470 | case 0x00D: | ||
471 | return &amd_f15_PMC3; | ||
472 | default: | ||
473 | return &amd_f15_PMC53; | ||
474 | } | ||
475 | case AMD_EVENT_LS: | ||
476 | case AMD_EVENT_DC: | ||
477 | case AMD_EVENT_EX_LS: | ||
478 | switch (event_code) { | ||
479 | case 0x023: | ||
480 | case 0x043: | ||
481 | case 0x045: | ||
482 | case 0x046: | ||
483 | case 0x054: | ||
484 | case 0x055: | ||
485 | return &amd_f15_PMC20; | ||
486 | case 0x02D: | ||
487 | return &amd_f15_PMC3; | ||
488 | case 0x02E: | ||
489 | return &amd_f15_PMC30; | ||
490 | default: | ||
491 | return &amd_f15_PMC50; | ||
492 | } | ||
493 | case AMD_EVENT_CU: | ||
494 | case AMD_EVENT_IC_DE: | ||
495 | case AMD_EVENT_DE: | ||
496 | switch (event_code) { | ||
497 | case 0x08F: | ||
498 | case 0x187: | ||
499 | case 0x188: | ||
500 | return &amd_f15_PMC0; | ||
501 | case 0x0DB ... 0x0DF: | ||
502 | case 0x1D6: | ||
503 | case 0x1D8: | ||
504 | return &amd_f15_PMC50; | ||
505 | default: | ||
506 | return &amd_f15_PMC20; | ||
507 | } | ||
508 | case AMD_EVENT_NB: | ||
509 | /* not yet implemented */ | ||
510 | return &emptyconstraint; | ||
511 | default: | ||
512 | return &emptyconstraint; | ||
513 | } | ||
514 | } | ||
515 | |||
516 | static __initconst const struct x86_pmu amd_pmu_f15h = { | ||
517 | .name = "AMD Family 15h", | ||
518 | .handle_irq = x86_pmu_handle_irq, | ||
519 | .disable_all = x86_pmu_disable_all, | ||
520 | .enable_all = x86_pmu_enable_all, | ||
521 | .enable = x86_pmu_enable_event, | ||
522 | .disable = x86_pmu_disable_event, | ||
523 | .hw_config = amd_pmu_hw_config, | ||
524 | .schedule_events = x86_schedule_events, | ||
525 | .eventsel = MSR_F15H_PERF_CTL, | ||
526 | .perfctr = MSR_F15H_PERF_CTR, | ||
527 | .event_map = amd_pmu_event_map, | ||
528 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
529 | .num_counters = 6, | ||
530 | .cntval_bits = 48, | ||
531 | .cntval_mask = (1ULL << 48) - 1, | ||
532 | .apic = 1, | ||
533 | /* use highest bit to detect overflow */ | ||
534 | .max_period = (1ULL << 47) - 1, | ||
535 | .get_event_constraints = amd_get_event_constraints_f15h, | ||
536 | /* nortbridge counters not yet implemented: */ | ||
537 | #if 0 | ||
538 | .put_event_constraints = amd_put_event_constraints, | ||
539 | |||
540 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
541 | .cpu_starting = amd_pmu_cpu_starting, | ||
542 | .cpu_dead = amd_pmu_cpu_dead, | ||
543 | #endif | ||
544 | }; | ||
545 | |||
388 | static __init int amd_pmu_init(void) | 546 | static __init int amd_pmu_init(void) |
389 | { | 547 | { |
390 | /* Performance-monitoring supported from K7 and later: */ | 548 | /* Performance-monitoring supported from K7 and later: */ |
391 | if (boot_cpu_data.x86 < 6) | 549 | if (boot_cpu_data.x86 < 6) |
392 | return -ENODEV; | 550 | return -ENODEV; |
393 | 551 | ||
394 | x86_pmu = amd_pmu; | 552 | /* |
553 | * If core performance counter extensions exists, it must be | ||
554 | * family 15h, otherwise fail. See x86_pmu_addr_offset(). | ||
555 | */ | ||
556 | switch (boot_cpu_data.x86) { | ||
557 | case 0x15: | ||
558 | if (!cpu_has_perfctr_core) | ||
559 | return -ENODEV; | ||
560 | x86_pmu = amd_pmu_f15h; | ||
561 | break; | ||
562 | default: | ||
563 | if (cpu_has_perfctr_core) | ||
564 | return -ENODEV; | ||
565 | x86_pmu = amd_pmu; | ||
566 | break; | ||
567 | } | ||
395 | 568 | ||
396 | /* Events are common for all AMDs */ | 569 | /* Events are common for all AMDs */ |
397 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 570 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c1d79c..8fc2b2cee1da 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,5 +1,27 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | ||
15 | * Per core state | ||
16 | * This used to coordinate shared registers for HT threads. | ||
17 | */ | ||
18 | struct intel_percore { | ||
19 | raw_spinlock_t lock; /* protect structure */ | ||
20 | struct er_account regs[MAX_EXTRA_REGS]; | ||
21 | int refcnt; /* number of threads */ | ||
22 | unsigned core_id; | ||
23 | }; | ||
24 | |||
3 | /* | 25 | /* |
4 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
5 | */ | 27 | */ |
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
64 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
65 | }; | 87 | }; |
66 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | ||
90 | { | ||
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
92 | EVENT_EXTRA_END | ||
93 | }; | ||
94 | |||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] = |
68 | { | 102 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] = | |||
76 | EVENT_CONSTRAINT_END | 110 | EVENT_CONSTRAINT_END |
77 | }; | 111 | }; |
78 | 112 | ||
113 | static struct event_constraint intel_snb_event_constraints[] = | ||
114 | { | ||
115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
117 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
118 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | ||
119 | INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ | ||
120 | INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ | ||
121 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | ||
122 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | ||
123 | EVENT_CONSTRAINT_END | ||
124 | }; | ||
125 | |||
126 | static struct extra_reg intel_westmere_extra_regs[] = | ||
127 | { | ||
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | ||
130 | EVENT_EXTRA_END | ||
131 | }; | ||
132 | |||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
79 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] = |
80 | { | 141 | { |
81 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event) | |||
89 | return intel_perfmon_event_map[hw_event]; | 150 | return intel_perfmon_event_map[hw_event]; |
90 | } | 151 | } |
91 | 152 | ||
153 | static __initconst const u64 snb_hw_cache_event_ids | ||
154 | [PERF_COUNT_HW_CACHE_MAX] | ||
155 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
156 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
157 | { | ||
158 | [ C(L1D) ] = { | ||
159 | [ C(OP_READ) ] = { | ||
160 | [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ | ||
161 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ | ||
162 | }, | ||
163 | [ C(OP_WRITE) ] = { | ||
164 | [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ | ||
165 | [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ | ||
166 | }, | ||
167 | [ C(OP_PREFETCH) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = 0x0, | ||
169 | [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ | ||
170 | }, | ||
171 | }, | ||
172 | [ C(L1I ) ] = { | ||
173 | [ C(OP_READ) ] = { | ||
174 | [ C(RESULT_ACCESS) ] = 0x0, | ||
175 | [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ | ||
176 | }, | ||
177 | [ C(OP_WRITE) ] = { | ||
178 | [ C(RESULT_ACCESS) ] = -1, | ||
179 | [ C(RESULT_MISS) ] = -1, | ||
180 | }, | ||
181 | [ C(OP_PREFETCH) ] = { | ||
182 | [ C(RESULT_ACCESS) ] = 0x0, | ||
183 | [ C(RESULT_MISS) ] = 0x0, | ||
184 | }, | ||
185 | }, | ||
186 | [ C(LL ) ] = { | ||
187 | /* | ||
188 | * TBD: Need Off-core Response Performance Monitoring support | ||
189 | */ | ||
190 | [ C(OP_READ) ] = { | ||
191 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | ||
192 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
193 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
194 | [ C(RESULT_MISS) ] = 0x01bb, | ||
195 | }, | ||
196 | [ C(OP_WRITE) ] = { | ||
197 | /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ | ||
198 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
199 | /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ | ||
200 | [ C(RESULT_MISS) ] = 0x01bb, | ||
201 | }, | ||
202 | [ C(OP_PREFETCH) ] = { | ||
203 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | ||
204 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
205 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
206 | [ C(RESULT_MISS) ] = 0x01bb, | ||
207 | }, | ||
208 | }, | ||
209 | [ C(DTLB) ] = { | ||
210 | [ C(OP_READ) ] = { | ||
211 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ | ||
212 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ | ||
213 | }, | ||
214 | [ C(OP_WRITE) ] = { | ||
215 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ | ||
216 | [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ | ||
217 | }, | ||
218 | [ C(OP_PREFETCH) ] = { | ||
219 | [ C(RESULT_ACCESS) ] = 0x0, | ||
220 | [ C(RESULT_MISS) ] = 0x0, | ||
221 | }, | ||
222 | }, | ||
223 | [ C(ITLB) ] = { | ||
224 | [ C(OP_READ) ] = { | ||
225 | [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ | ||
226 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ | ||
227 | }, | ||
228 | [ C(OP_WRITE) ] = { | ||
229 | [ C(RESULT_ACCESS) ] = -1, | ||
230 | [ C(RESULT_MISS) ] = -1, | ||
231 | }, | ||
232 | [ C(OP_PREFETCH) ] = { | ||
233 | [ C(RESULT_ACCESS) ] = -1, | ||
234 | [ C(RESULT_MISS) ] = -1, | ||
235 | }, | ||
236 | }, | ||
237 | [ C(BPU ) ] = { | ||
238 | [ C(OP_READ) ] = { | ||
239 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
240 | [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
241 | }, | ||
242 | [ C(OP_WRITE) ] = { | ||
243 | [ C(RESULT_ACCESS) ] = -1, | ||
244 | [ C(RESULT_MISS) ] = -1, | ||
245 | }, | ||
246 | [ C(OP_PREFETCH) ] = { | ||
247 | [ C(RESULT_ACCESS) ] = -1, | ||
248 | [ C(RESULT_MISS) ] = -1, | ||
249 | }, | ||
250 | }, | ||
251 | }; | ||
252 | |||
92 | static __initconst const u64 westmere_hw_cache_event_ids | 253 | static __initconst const u64 westmere_hw_cache_event_ids |
93 | [PERF_COUNT_HW_CACHE_MAX] | 254 | [PERF_COUNT_HW_CACHE_MAX] |
94 | [PERF_COUNT_HW_CACHE_OP_MAX] | 255 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
124 | }, | 285 | }, |
125 | [ C(LL ) ] = { | 286 | [ C(LL ) ] = { |
126 | [ C(OP_READ) ] = { | 287 | [ C(OP_READ) ] = { |
127 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 288 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ |
128 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 289 | [ C(RESULT_ACCESS) ] = 0x01b7, |
290 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
291 | [ C(RESULT_MISS) ] = 0x01bb, | ||
129 | }, | 292 | }, |
293 | /* | ||
294 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
295 | * on RFO. | ||
296 | */ | ||
130 | [ C(OP_WRITE) ] = { | 297 | [ C(OP_WRITE) ] = { |
131 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 298 | /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ |
132 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 299 | [ C(RESULT_ACCESS) ] = 0x01bb, |
300 | /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ | ||
301 | [ C(RESULT_MISS) ] = 0x01b7, | ||
133 | }, | 302 | }, |
134 | [ C(OP_PREFETCH) ] = { | 303 | [ C(OP_PREFETCH) ] = { |
135 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 304 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ |
136 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 305 | [ C(RESULT_ACCESS) ] = 0x01b7, |
306 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
307 | [ C(RESULT_MISS) ] = 0x01bb, | ||
137 | }, | 308 | }, |
138 | }, | 309 | }, |
139 | [ C(DTLB) ] = { | 310 | [ C(DTLB) ] = { |
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
180 | }, | 351 | }, |
181 | }; | 352 | }; |
182 | 353 | ||
354 | /* | ||
355 | * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 | ||
356 | */ | ||
357 | |||
358 | #define DMND_DATA_RD (1 << 0) | ||
359 | #define DMND_RFO (1 << 1) | ||
360 | #define DMND_WB (1 << 3) | ||
361 | #define PF_DATA_RD (1 << 4) | ||
362 | #define PF_DATA_RFO (1 << 5) | ||
363 | #define RESP_UNCORE_HIT (1 << 8) | ||
364 | #define RESP_MISS (0xf600) /* non uncore hit */ | ||
365 | |||
366 | static __initconst const u64 nehalem_hw_cache_extra_regs | ||
367 | [PERF_COUNT_HW_CACHE_MAX] | ||
368 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
369 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
370 | { | ||
371 | [ C(LL ) ] = { | ||
372 | [ C(OP_READ) ] = { | ||
373 | [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, | ||
374 | [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, | ||
375 | }, | ||
376 | [ C(OP_WRITE) ] = { | ||
377 | [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, | ||
378 | [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, | ||
379 | }, | ||
380 | [ C(OP_PREFETCH) ] = { | ||
381 | [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, | ||
382 | [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, | ||
383 | }, | ||
384 | } | ||
385 | }; | ||
386 | |||
183 | static __initconst const u64 nehalem_hw_cache_event_ids | 387 | static __initconst const u64 nehalem_hw_cache_event_ids |
184 | [PERF_COUNT_HW_CACHE_MAX] | 388 | [PERF_COUNT_HW_CACHE_MAX] |
185 | [PERF_COUNT_HW_CACHE_OP_MAX] | 389 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
215 | }, | 419 | }, |
216 | [ C(LL ) ] = { | 420 | [ C(LL ) ] = { |
217 | [ C(OP_READ) ] = { | 421 | [ C(OP_READ) ] = { |
218 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 422 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
219 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 423 | [ C(RESULT_ACCESS) ] = 0x01b7, |
424 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ | ||
425 | [ C(RESULT_MISS) ] = 0x01b7, | ||
220 | }, | 426 | }, |
427 | /* | ||
428 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
429 | * on RFO. | ||
430 | */ | ||
221 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
222 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 432 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
223 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 433 | [ C(RESULT_ACCESS) ] = 0x01b7, |
434 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ | ||
435 | [ C(RESULT_MISS) ] = 0x01b7, | ||
224 | }, | 436 | }, |
225 | [ C(OP_PREFETCH) ] = { | 437 | [ C(OP_PREFETCH) ] = { |
226 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 438 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
227 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 439 | [ C(RESULT_ACCESS) ] = 0x01b7, |
440 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ | ||
441 | [ C(RESULT_MISS) ] = 0x01b7, | ||
228 | }, | 442 | }, |
229 | }, | 443 | }, |
230 | [ C(DTLB) ] = { | 444 | [ C(DTLB) ] = { |
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void) | |||
691 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 905 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
692 | 906 | ||
693 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 907 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
694 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 908 | checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); |
695 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 909 | checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); |
696 | } | 910 | } |
697 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) | 911 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
698 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 912 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event) | |||
794 | } | 1008 | } |
795 | 1009 | ||
796 | static struct event_constraint * | 1010 | static struct event_constraint * |
1011 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
1012 | { | ||
1013 | struct hw_perf_event *hwc = &event->hw; | ||
1014 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
1015 | struct event_constraint *c; | ||
1016 | struct intel_percore *pc; | ||
1017 | struct er_account *era; | ||
1018 | int i; | ||
1019 | int free_slot; | ||
1020 | int found; | ||
1021 | |||
1022 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | ||
1023 | return NULL; | ||
1024 | |||
1025 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | ||
1026 | if (e != c->code) | ||
1027 | continue; | ||
1028 | |||
1029 | /* | ||
1030 | * Allocate resource per core. | ||
1031 | */ | ||
1032 | pc = cpuc->per_core; | ||
1033 | if (!pc) | ||
1034 | break; | ||
1035 | c = &emptyconstraint; | ||
1036 | raw_spin_lock(&pc->lock); | ||
1037 | free_slot = -1; | ||
1038 | found = 0; | ||
1039 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1040 | era = &pc->regs[i]; | ||
1041 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1042 | /* Allow sharing same config */ | ||
1043 | if (hwc->extra_config == era->extra_config) { | ||
1044 | era->ref++; | ||
1045 | cpuc->percore_used = 1; | ||
1046 | hwc->extra_alloc = 1; | ||
1047 | c = NULL; | ||
1048 | } | ||
1049 | /* else conflict */ | ||
1050 | found = 1; | ||
1051 | break; | ||
1052 | } else if (era->ref == 0 && free_slot == -1) | ||
1053 | free_slot = i; | ||
1054 | } | ||
1055 | if (!found && free_slot != -1) { | ||
1056 | era = &pc->regs[free_slot]; | ||
1057 | era->ref = 1; | ||
1058 | era->extra_reg = hwc->extra_reg; | ||
1059 | era->extra_config = hwc->extra_config; | ||
1060 | cpuc->percore_used = 1; | ||
1061 | hwc->extra_alloc = 1; | ||
1062 | c = NULL; | ||
1063 | } | ||
1064 | raw_spin_unlock(&pc->lock); | ||
1065 | return c; | ||
1066 | } | ||
1067 | |||
1068 | return NULL; | ||
1069 | } | ||
1070 | |||
1071 | static struct event_constraint * | ||
797 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1072 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
798 | { | 1073 | { |
799 | struct event_constraint *c; | 1074 | struct event_constraint *c; |
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
806 | if (c) | 1081 | if (c) |
807 | return c; | 1082 | return c; |
808 | 1083 | ||
1084 | c = intel_percore_constraints(cpuc, event); | ||
1085 | if (c) | ||
1086 | return c; | ||
1087 | |||
809 | return x86_get_event_constraints(cpuc, event); | 1088 | return x86_get_event_constraints(cpuc, event); |
810 | } | 1089 | } |
811 | 1090 | ||
1091 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | ||
1092 | struct perf_event *event) | ||
1093 | { | ||
1094 | struct extra_reg *er; | ||
1095 | struct intel_percore *pc; | ||
1096 | struct er_account *era; | ||
1097 | struct hw_perf_event *hwc = &event->hw; | ||
1098 | int i, allref; | ||
1099 | |||
1100 | if (!cpuc->percore_used) | ||
1101 | return; | ||
1102 | |||
1103 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
1104 | if (er->event != (hwc->config & er->config_mask)) | ||
1105 | continue; | ||
1106 | |||
1107 | pc = cpuc->per_core; | ||
1108 | raw_spin_lock(&pc->lock); | ||
1109 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1110 | era = &pc->regs[i]; | ||
1111 | if (era->ref > 0 && | ||
1112 | era->extra_config == hwc->extra_config && | ||
1113 | era->extra_reg == er->msr) { | ||
1114 | era->ref--; | ||
1115 | hwc->extra_alloc = 0; | ||
1116 | break; | ||
1117 | } | ||
1118 | } | ||
1119 | allref = 0; | ||
1120 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1121 | allref += pc->regs[i].ref; | ||
1122 | if (allref == 0) | ||
1123 | cpuc->percore_used = 0; | ||
1124 | raw_spin_unlock(&pc->lock); | ||
1125 | break; | ||
1126 | } | ||
1127 | } | ||
1128 | |||
812 | static int intel_pmu_hw_config(struct perf_event *event) | 1129 | static int intel_pmu_hw_config(struct perf_event *event) |
813 | { | 1130 | { |
814 | int ret = x86_pmu_hw_config(event); | 1131 | int ret = x86_pmu_hw_config(event); |
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = { | |||
880 | */ | 1197 | */ |
881 | .max_period = (1ULL << 31) - 1, | 1198 | .max_period = (1ULL << 31) - 1, |
882 | .get_event_constraints = intel_get_event_constraints, | 1199 | .get_event_constraints = intel_get_event_constraints, |
1200 | .put_event_constraints = intel_put_event_constraints, | ||
883 | .event_constraints = intel_core_event_constraints, | 1201 | .event_constraints = intel_core_event_constraints, |
884 | }; | 1202 | }; |
885 | 1203 | ||
1204 | static int intel_pmu_cpu_prepare(int cpu) | ||
1205 | { | ||
1206 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1207 | |||
1208 | if (!cpu_has_ht_siblings()) | ||
1209 | return NOTIFY_OK; | ||
1210 | |||
1211 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | ||
1212 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1213 | if (!cpuc->per_core) | ||
1214 | return NOTIFY_BAD; | ||
1215 | |||
1216 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1217 | cpuc->per_core->core_id = -1; | ||
1218 | return NOTIFY_OK; | ||
1219 | } | ||
1220 | |||
886 | static void intel_pmu_cpu_starting(int cpu) | 1221 | static void intel_pmu_cpu_starting(int cpu) |
887 | { | 1222 | { |
1223 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1224 | int core_id = topology_core_id(cpu); | ||
1225 | int i; | ||
1226 | |||
888 | init_debug_store_on_cpu(cpu); | 1227 | init_debug_store_on_cpu(cpu); |
889 | /* | 1228 | /* |
890 | * Deal with CPUs that don't clear their LBRs on power-up. | 1229 | * Deal with CPUs that don't clear their LBRs on power-up. |
891 | */ | 1230 | */ |
892 | intel_pmu_lbr_reset(); | 1231 | intel_pmu_lbr_reset(); |
1232 | |||
1233 | if (!cpu_has_ht_siblings()) | ||
1234 | return; | ||
1235 | |||
1236 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | ||
1237 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | ||
1238 | |||
1239 | if (pc && pc->core_id == core_id) { | ||
1240 | kfree(cpuc->per_core); | ||
1241 | cpuc->per_core = pc; | ||
1242 | break; | ||
1243 | } | ||
1244 | } | ||
1245 | |||
1246 | cpuc->per_core->core_id = core_id; | ||
1247 | cpuc->per_core->refcnt++; | ||
893 | } | 1248 | } |
894 | 1249 | ||
895 | static void intel_pmu_cpu_dying(int cpu) | 1250 | static void intel_pmu_cpu_dying(int cpu) |
896 | { | 1251 | { |
1252 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1253 | struct intel_percore *pc = cpuc->per_core; | ||
1254 | |||
1255 | if (pc) { | ||
1256 | if (pc->core_id == -1 || --pc->refcnt == 0) | ||
1257 | kfree(pc); | ||
1258 | cpuc->per_core = NULL; | ||
1259 | } | ||
1260 | |||
897 | fini_debug_store_on_cpu(cpu); | 1261 | fini_debug_store_on_cpu(cpu); |
898 | } | 1262 | } |
899 | 1263 | ||
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
918 | */ | 1282 | */ |
919 | .max_period = (1ULL << 31) - 1, | 1283 | .max_period = (1ULL << 31) - 1, |
920 | .get_event_constraints = intel_get_event_constraints, | 1284 | .get_event_constraints = intel_get_event_constraints, |
1285 | .put_event_constraints = intel_put_event_constraints, | ||
921 | 1286 | ||
1287 | .cpu_prepare = intel_pmu_cpu_prepare, | ||
922 | .cpu_starting = intel_pmu_cpu_starting, | 1288 | .cpu_starting = intel_pmu_cpu_starting, |
923 | .cpu_dying = intel_pmu_cpu_dying, | 1289 | .cpu_dying = intel_pmu_cpu_dying, |
924 | }; | 1290 | }; |
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void) | |||
1024 | intel_pmu_lbr_init_core(); | 1390 | intel_pmu_lbr_init_core(); |
1025 | 1391 | ||
1026 | x86_pmu.event_constraints = intel_core2_event_constraints; | 1392 | x86_pmu.event_constraints = intel_core2_event_constraints; |
1393 | x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; | ||
1027 | pr_cont("Core2 events, "); | 1394 | pr_cont("Core2 events, "); |
1028 | break; | 1395 | break; |
1029 | 1396 | ||
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void) | |||
1032 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | 1399 | case 46: /* 45 nm nehalem-ex, "Beckton" */ |
1033 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1400 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
1034 | sizeof(hw_cache_event_ids)); | 1401 | sizeof(hw_cache_event_ids)); |
1402 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1403 | sizeof(hw_cache_extra_regs)); | ||
1035 | 1404 | ||
1036 | intel_pmu_lbr_init_nhm(); | 1405 | intel_pmu_lbr_init_nhm(); |
1037 | 1406 | ||
1038 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1407 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1408 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | ||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1039 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | ||
1040 | pr_cont("Nehalem events, "); | 1412 | pr_cont("Nehalem events, "); |
1041 | break; | 1413 | break; |
1042 | 1414 | ||
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void) | |||
1047 | intel_pmu_lbr_init_atom(); | 1419 | intel_pmu_lbr_init_atom(); |
1048 | 1420 | ||
1049 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1421 | x86_pmu.event_constraints = intel_gen_event_constraints; |
1422 | x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; | ||
1050 | pr_cont("Atom events, "); | 1423 | pr_cont("Atom events, "); |
1051 | break; | 1424 | break; |
1052 | 1425 | ||
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void) | |||
1054 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1427 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1055 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1428 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1056 | sizeof(hw_cache_event_ids)); | 1429 | sizeof(hw_cache_event_ids)); |
1430 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1431 | sizeof(hw_cache_extra_regs)); | ||
1057 | 1432 | ||
1058 | intel_pmu_lbr_init_nhm(); | 1433 | intel_pmu_lbr_init_nhm(); |
1059 | 1434 | ||
1060 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1435 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1436 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1061 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1437 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1438 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | ||
1439 | x86_pmu.extra_regs = intel_westmere_extra_regs; | ||
1062 | pr_cont("Westmere events, "); | 1440 | pr_cont("Westmere events, "); |
1063 | break; | 1441 | break; |
1064 | 1442 | ||
1443 | case 42: /* SandyBridge */ | ||
1444 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | ||
1445 | sizeof(hw_cache_event_ids)); | ||
1446 | |||
1447 | intel_pmu_lbr_init_nhm(); | ||
1448 | |||
1449 | x86_pmu.event_constraints = intel_snb_event_constraints; | ||
1450 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | ||
1451 | pr_cont("SandyBridge events, "); | ||
1452 | break; | ||
1453 | |||
1065 | default: | 1454 | default: |
1066 | /* | 1455 | /* |
1067 | * default constraints for v2 and up | 1456 | * default constraints for v2 and up |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b7dcd9f2b8a0..b95c66ae4a2a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void) | |||
361 | /* | 361 | /* |
362 | * PEBS | 362 | * PEBS |
363 | */ | 363 | */ |
364 | 364 | static struct event_constraint intel_core2_pebs_event_constraints[] = { | |
365 | static struct event_constraint intel_core_pebs_events[] = { | 365 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ |
366 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
367 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | 366 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ |
368 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | 367 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ |
369 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | 368 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ |
370 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | 369 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ |
371 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | 370 | EVENT_CONSTRAINT_END |
372 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | 371 | }; |
373 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | 372 | |
374 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | 373 | static struct event_constraint intel_atom_pebs_event_constraints[] = { |
374 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | ||
375 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ | ||
376 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | ||
375 | EVENT_CONSTRAINT_END | 377 | EVENT_CONSTRAINT_END |
376 | }; | 378 | }; |
377 | 379 | ||
378 | static struct event_constraint intel_nehalem_pebs_events[] = { | 380 | static struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
379 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | 381 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ |
380 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | 382 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
381 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | 383 | PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
382 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | 384 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ |
383 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | 385 | INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ |
384 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | 386 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
385 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | 387 | PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ |
386 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | 388 | INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ |
387 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | 389 | PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
390 | INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | ||
391 | INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | ||
392 | EVENT_CONSTRAINT_END | ||
393 | }; | ||
394 | |||
395 | static struct event_constraint intel_westmere_pebs_event_constraints[] = { | ||
396 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | ||
397 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | ||
398 | PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
399 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ | ||
400 | INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ | ||
401 | |||
402 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | ||
403 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | ||
404 | INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ | ||
405 | PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ | ||
406 | INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | ||
407 | INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | ||
408 | EVENT_CONSTRAINT_END | ||
409 | }; | ||
410 | |||
411 | static struct event_constraint intel_snb_pebs_events[] = { | ||
412 | PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ | ||
413 | PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ | ||
414 | PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | ||
415 | PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */ | ||
416 | PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */ | ||
417 | PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
418 | PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */ | ||
419 | PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */ | ||
420 | PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */ | ||
421 | PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */ | ||
422 | PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ | ||
423 | PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ | ||
424 | PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
425 | PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */ | ||
426 | PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */ | ||
427 | PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | ||
428 | PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */ | ||
429 | PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ | ||
430 | PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ | ||
431 | PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ | ||
432 | PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ | ||
433 | PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ | ||
434 | PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ | ||
435 | PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ | ||
436 | PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ | ||
437 | PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ | ||
438 | PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ | ||
439 | PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */ | ||
440 | PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ | ||
441 | PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ | ||
442 | PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ | ||
443 | PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */ | ||
444 | PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */ | ||
445 | PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ | ||
388 | EVENT_CONSTRAINT_END | 446 | EVENT_CONSTRAINT_END |
389 | }; | 447 | }; |
390 | 448 | ||
@@ -695,20 +753,17 @@ static void intel_ds_init(void) | |||
695 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | 753 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); |
696 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | 754 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); |
697 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | 755 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; |
698 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
699 | break; | 756 | break; |
700 | 757 | ||
701 | case 1: | 758 | case 1: |
702 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | 759 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); |
703 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | 760 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); |
704 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | 761 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
705 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
706 | break; | 762 | break; |
707 | 763 | ||
708 | default: | 764 | default: |
709 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | 765 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); |
710 | x86_pmu.pebs = 0; | 766 | x86_pmu.pebs = 0; |
711 | break; | ||
712 | } | 767 | } |
713 | } | 768 | } |
714 | } | 769 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ff751a9f182b..3769ac822f96 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |||
764 | u64 v; | 764 | u64 v; |
765 | 765 | ||
766 | /* an official way for overflow indication */ | 766 | /* an official way for overflow indication */ |
767 | rdmsrl(hwc->config_base + hwc->idx, v); | 767 | rdmsrl(hwc->config_base, v); |
768 | if (v & P4_CCCR_OVF) { | 768 | if (v & P4_CCCR_OVF) { |
769 | wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); | 769 | wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); |
770 | return 1; | 770 | return 1; |
771 | } | 771 | } |
772 | 772 | ||
@@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) | |||
815 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | 815 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get |
816 | * asserted again and again | 816 | * asserted again and again |
817 | */ | 817 | */ |
818 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 818 | (void)checking_wrmsrl(hwc->config_base, |
819 | (u64)(p4_config_unpack_cccr(hwc->config)) & | 819 | (u64)(p4_config_unpack_cccr(hwc->config)) & |
820 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | 820 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
821 | } | 821 | } |
@@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
885 | p4_pmu_enable_pebs(hwc->config); | 885 | p4_pmu_enable_pebs(hwc->config); |
886 | 886 | ||
887 | (void)checking_wrmsrl(escr_addr, escr_conf); | 887 | (void)checking_wrmsrl(escr_addr, escr_conf); |
888 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 888 | (void)checking_wrmsrl(hwc->config_base, |
889 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | 889 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
890 | } | 890 | } |
891 | 891 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 34ba07be2cda..20c097e33860 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event) | |||
68 | if (cpuc->enabled) | 68 | if (cpuc->enabled) |
69 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 69 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
70 | 70 | ||
71 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 71 | (void)checking_wrmsrl(hwc->config_base, val); |
72 | } | 72 | } |
73 | 73 | ||
74 | static void p6_pmu_enable_event(struct perf_event *event) | 74 | static void p6_pmu_enable_event(struct perf_event *event) |
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
81 | if (cpuc->enabled) | 81 | if (cpuc->enabled) |
82 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 82 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
83 | 83 | ||
84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base, val); |
85 | } | 85 | } |
86 | 86 | ||
87 | static __initconst const struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d5a236615501..966512b2cacf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |||
46 | /* returns the bit offset of the performance counter register */ | 46 | /* returns the bit offset of the performance counter register */ |
47 | switch (boot_cpu_data.x86_vendor) { | 47 | switch (boot_cpu_data.x86_vendor) { |
48 | case X86_VENDOR_AMD: | 48 | case X86_VENDOR_AMD: |
49 | if (msr >= MSR_F15H_PERF_CTR) | ||
50 | return (msr - MSR_F15H_PERF_CTR) >> 1; | ||
49 | return msr - MSR_K7_PERFCTR0; | 51 | return msr - MSR_K7_PERFCTR0; |
50 | case X86_VENDOR_INTEL: | 52 | case X86_VENDOR_INTEL: |
51 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 53 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | |||
70 | /* returns the bit offset of the event selection register */ | 72 | /* returns the bit offset of the event selection register */ |
71 | switch (boot_cpu_data.x86_vendor) { | 73 | switch (boot_cpu_data.x86_vendor) { |
72 | case X86_VENDOR_AMD: | 74 | case X86_VENDOR_AMD: |
75 | if (msr >= MSR_F15H_PERF_CTL) | ||
76 | return (msr - MSR_F15H_PERF_CTL) >> 1; | ||
73 | return msr - MSR_K7_EVNTSEL0; | 77 | return msr - MSR_K7_EVNTSEL0; |
74 | case X86_VENDOR_INTEL: | 78 | case X86_VENDOR_INTEL: |
75 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 79 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1b..220a1c11cfde 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
320 | oops_end(flags, regs, sig); | 320 | oops_end(flags, regs, sig); |
321 | } | 321 | } |
322 | 322 | ||
323 | void notrace __kprobes | ||
324 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
325 | { | ||
326 | unsigned long flags; | ||
327 | |||
328 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
329 | return; | ||
330 | |||
331 | /* | ||
332 | * We are in trouble anyway, lets at least try | ||
333 | * to get a message out. | ||
334 | */ | ||
335 | flags = oops_begin(); | ||
336 | printk(KERN_EMERG "%s", str); | ||
337 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
338 | smp_processor_id(), regs->ip); | ||
339 | show_registers(regs); | ||
340 | oops_end(flags, regs, 0); | ||
341 | if (do_panic || panic_on_oops) | ||
342 | panic("Non maskable interrupt"); | ||
343 | nmi_exit(); | ||
344 | local_irq_enable(); | ||
345 | do_exit(SIGBUS); | ||
346 | } | ||
347 | |||
348 | static int __init oops_setup(char *s) | 323 | static int __init oops_setup(char *s) |
349 | { | 324 | { |
350 | if (!s) | 325 | if (!s) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efad7ebb..f5accf8eaa78 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -65,6 +65,8 @@ | |||
65 | #define sysexit_audit syscall_exit_work | 65 | #define sysexit_audit syscall_exit_work |
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | .section .entry.text, "ax" | ||
69 | |||
68 | /* | 70 | /* |
69 | * We use macros for low-level operations which need to be overridden | 71 | * We use macros for low-level operations which need to be overridden |
70 | * for paravirtualization. The following will never clobber any registers: | 72 | * for paravirtualization. The following will never clobber any registers: |
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) | |||
788 | */ | 790 | */ |
789 | .section .init.rodata,"a" | 791 | .section .init.rodata,"a" |
790 | ENTRY(interrupt) | 792 | ENTRY(interrupt) |
791 | .text | 793 | .section .entry.text, "ax" |
792 | .p2align 5 | 794 | .p2align 5 |
793 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 795 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
794 | ENTRY(irq_entries_start) | 796 | ENTRY(irq_entries_start) |
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
807 | .endif | 809 | .endif |
808 | .previous | 810 | .previous |
809 | .long 1b | 811 | .long 1b |
810 | .text | 812 | .section .entry.text, "ax" |
811 | vector=vector+1 | 813 | vector=vector+1 |
812 | .endif | 814 | .endif |
813 | .endr | 815 | .endr |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index aed1ffbeb0c9..0a0ed794edb2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -61,6 +61,8 @@ | |||
61 | #define __AUDIT_ARCH_LE 0x40000000 | 61 | #define __AUDIT_ARCH_LE 0x40000000 |
62 | 62 | ||
63 | .code64 | 63 | .code64 |
64 | .section .entry.text, "ax" | ||
65 | |||
64 | #ifdef CONFIG_FUNCTION_TRACER | 66 | #ifdef CONFIG_FUNCTION_TRACER |
65 | #ifdef CONFIG_DYNAMIC_FTRACE | 67 | #ifdef CONFIG_DYNAMIC_FTRACE |
66 | ENTRY(mcount) | 68 | ENTRY(mcount) |
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn) | |||
744 | */ | 746 | */ |
745 | .section .init.rodata,"a" | 747 | .section .init.rodata,"a" |
746 | ENTRY(interrupt) | 748 | ENTRY(interrupt) |
747 | .text | 749 | .section .entry.text |
748 | .p2align 5 | 750 | .p2align 5 |
749 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 751 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
750 | ENTRY(irq_entries_start) | 752 | ENTRY(irq_entries_start) |
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
763 | .endif | 765 | .endif |
764 | .previous | 766 | .previous |
765 | .quad 1b | 767 | .quad 1b |
766 | .text | 768 | .section .entry.text |
767 | vector=vector+1 | 769 | vector=vector+1 |
768 | .endif | 770 | .endif |
769 | .endr | 771 | .endr |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 382eb2936d4d..a93742a57468 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
437 | return; | 437 | return; |
438 | } | 438 | } |
439 | 439 | ||
440 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | ||
441 | frame_pointer) == -EBUSY) { | ||
442 | *parent = old; | ||
443 | return; | ||
444 | } | ||
445 | |||
446 | trace.func = self_addr; | 440 | trace.func = self_addr; |
441 | trace.depth = current->curr_ret_stack + 1; | ||
447 | 442 | ||
448 | /* Only trace if the calling function expects to */ | 443 | /* Only trace if the calling function expects to */ |
449 | if (!ftrace_graph_entry(&trace)) { | 444 | if (!ftrace_graph_entry(&trace)) { |
450 | current->curr_ret_stack--; | ||
451 | *parent = old; | 445 | *parent = old; |
446 | return; | ||
447 | } | ||
448 | |||
449 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | ||
450 | frame_pointer) == -EBUSY) { | ||
451 | *parent = old; | ||
452 | return; | ||
452 | } | 453 | } |
453 | } | 454 | } |
454 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 455 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a4130005028a..7c64c420a9f6 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
533 | } | 533 | } |
534 | return NOTIFY_DONE; | 534 | return NOTIFY_DONE; |
535 | 535 | ||
536 | case DIE_NMIWATCHDOG: | ||
537 | if (atomic_read(&kgdb_active) != -1) { | ||
538 | /* KGDB CPU roundup: */ | ||
539 | kgdb_nmicallback(raw_smp_processor_id(), regs); | ||
540 | return NOTIFY_STOP; | ||
541 | } | ||
542 | /* Enter debugger: */ | ||
543 | break; | ||
544 | |||
545 | case DIE_DEBUG: | 536 | case DIE_DEBUG: |
546 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { | 537 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { |
547 | if (user_mode(regs)) | 538 | if (user_mode(regs)) |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index d91c477b3f62..c969fd9d1566 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr) | |||
1276 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | 1276 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) |
1277 | return 0; | 1277 | return 0; |
1278 | 1278 | ||
1279 | /* | ||
1280 | * Do not optimize in the entry code due to the unstable | ||
1281 | * stack handling. | ||
1282 | */ | ||
1283 | if ((paddr >= (unsigned long )__entry_text_start) && | ||
1284 | (paddr < (unsigned long )__entry_text_end)) | ||
1285 | return 0; | ||
1286 | |||
1279 | /* Check there is enough space for a relative jump. */ | 1287 | /* Check there is enough space for a relative jump. */ |
1280 | if (size - offset < RELATIVEJUMP_SIZE) | 1288 | if (size - offset < RELATIVEJUMP_SIZE) |
1281 | return 0; | 1289 | return 0; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bf4700755184..6d4341d5c52a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -105,6 +105,7 @@ SECTIONS | |||
105 | SCHED_TEXT | 105 | SCHED_TEXT |
106 | LOCK_TEXT | 106 | LOCK_TEXT |
107 | KPROBES_TEXT | 107 | KPROBES_TEXT |
108 | ENTRY_TEXT | ||
108 | IRQENTRY_TEXT | 109 | IRQENTRY_TEXT |
109 | *(.fixup) | 110 | *(.fixup) |
110 | *(.gnu.warning) | 111 | *(.gnu.warning) |