diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-06 12:30:52 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-06 12:30:52 -0400 |
| commit | 4aed2fd8e3181fea7c09ba79cf64e7e3f4413bf9 (patch) | |
| tree | 1f69733e5daab4915a76a41de0e4d1dc61e12cfb /arch/x86/kernel/cpu | |
| parent | 3a3527b6461b1298cc53ce72f336346739297ac8 (diff) | |
| parent | fc9ea5a1e53ee54f681e226d735008e2a6f8f470 (diff) | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (162 commits)
tracing/kprobes: unregister_trace_probe needs to be called under mutex
perf: expose event__process function
perf events: Fix mmap offset determination
perf, powerpc: fsl_emb: Restore setting perf_sample_data.period
perf, powerpc: Convert the FSL driver to use local64_t
perf tools: Don't keep unreferenced maps when unmaps are detected
perf session: Invalidate last_match when removing threads from rb_tree
perf session: Free the ref_reloc_sym memory at the right place
x86,mmiotrace: Add support for tracing STOS instruction
perf, sched migration: Librarize task states and event headers helpers
perf, sched migration: Librarize the GUI class
perf, sched migration: Make the GUI class client agnostic
perf, sched migration: Make it vertically scrollable
perf, sched migration: Parameterize cpu height and spacing
perf, sched migration: Fix key bindings
perf, sched migration: Ignore unhandled task states
perf, sched migration: Handle ignored migrate out events
perf: New migration tool overview
tracing: Drop cpparg() macro
perf: Use tracepoint_synchronize_unregister() to flush any pending tracepoint call
...
Fix up trivial conflicts in Makefile and drivers/cpufreq/cpufreq.c
Diffstat (limited to 'arch/x86/kernel/cpu')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 62 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 156 |
2 files changed, 145 insertions, 73 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..f2da20fda02d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -220,6 +220,7 @@ struct x86_pmu { | |||
| 220 | struct perf_event *event); | 220 | struct perf_event *event); |
| 221 | struct event_constraint *event_constraints; | 221 | struct event_constraint *event_constraints; |
| 222 | void (*quirks)(void); | 222 | void (*quirks)(void); |
| 223 | int perfctr_second_write; | ||
| 223 | 224 | ||
| 224 | int (*cpu_prepare)(int cpu); | 225 | int (*cpu_prepare)(int cpu); |
| 225 | void (*cpu_starting)(int cpu); | 226 | void (*cpu_starting)(int cpu); |
| @@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) | |||
| 295 | * count to the generic event atomically: | 296 | * count to the generic event atomically: |
| 296 | */ | 297 | */ |
| 297 | again: | 298 | again: |
| 298 | prev_raw_count = atomic64_read(&hwc->prev_count); | 299 | prev_raw_count = local64_read(&hwc->prev_count); |
| 299 | rdmsrl(hwc->event_base + idx, new_raw_count); | 300 | rdmsrl(hwc->event_base + idx, new_raw_count); |
| 300 | 301 | ||
| 301 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 302 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| 302 | new_raw_count) != prev_raw_count) | 303 | new_raw_count) != prev_raw_count) |
| 303 | goto again; | 304 | goto again; |
| 304 | 305 | ||
| @@ -313,8 +314,8 @@ again: | |||
| 313 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 314 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
| 314 | delta >>= shift; | 315 | delta >>= shift; |
| 315 | 316 | ||
| 316 | atomic64_add(delta, &event->count); | 317 | local64_add(delta, &event->count); |
| 317 | atomic64_sub(delta, &hwc->period_left); | 318 | local64_sub(delta, &hwc->period_left); |
| 318 | 319 | ||
| 319 | return new_raw_count; | 320 | return new_raw_count; |
| 320 | } | 321 | } |
| @@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
| 438 | if (!hwc->sample_period) { | 439 | if (!hwc->sample_period) { |
| 439 | hwc->sample_period = x86_pmu.max_period; | 440 | hwc->sample_period = x86_pmu.max_period; |
| 440 | hwc->last_period = hwc->sample_period; | 441 | hwc->last_period = hwc->sample_period; |
| 441 | atomic64_set(&hwc->period_left, hwc->sample_period); | 442 | local64_set(&hwc->period_left, hwc->sample_period); |
| 442 | } else { | 443 | } else { |
| 443 | /* | 444 | /* |
| 444 | * If we have a PMU initialized but no APIC | 445 | * If we have a PMU initialized but no APIC |
| @@ -885,7 +886,7 @@ static int | |||
| 885 | x86_perf_event_set_period(struct perf_event *event) | 886 | x86_perf_event_set_period(struct perf_event *event) |
| 886 | { | 887 | { |
| 887 | struct hw_perf_event *hwc = &event->hw; | 888 | struct hw_perf_event *hwc = &event->hw; |
| 888 | s64 left = atomic64_read(&hwc->period_left); | 889 | s64 left = local64_read(&hwc->period_left); |
| 889 | s64 period = hwc->sample_period; | 890 | s64 period = hwc->sample_period; |
| 890 | int ret = 0, idx = hwc->idx; | 891 | int ret = 0, idx = hwc->idx; |
| 891 | 892 | ||
| @@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 897 | */ | 898 | */ |
| 898 | if (unlikely(left <= -period)) { | 899 | if (unlikely(left <= -period)) { |
| 899 | left = period; | 900 | left = period; |
| 900 | atomic64_set(&hwc->period_left, left); | 901 | local64_set(&hwc->period_left, left); |
| 901 | hwc->last_period = period; | 902 | hwc->last_period = period; |
| 902 | ret = 1; | 903 | ret = 1; |
| 903 | } | 904 | } |
| 904 | 905 | ||
| 905 | if (unlikely(left <= 0)) { | 906 | if (unlikely(left <= 0)) { |
| 906 | left += period; | 907 | left += period; |
| 907 | atomic64_set(&hwc->period_left, left); | 908 | local64_set(&hwc->period_left, left); |
| 908 | hwc->last_period = period; | 909 | hwc->last_period = period; |
| 909 | ret = 1; | 910 | ret = 1; |
| 910 | } | 911 | } |
| @@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 923 | * The hw event starts counting from this event offset, | 924 | * The hw event starts counting from this event offset, |
| 924 | * mark it to be able to extra future deltas: | 925 | * mark it to be able to extra future deltas: |
| 925 | */ | 926 | */ |
| 926 | atomic64_set(&hwc->prev_count, (u64)-left); | 927 | local64_set(&hwc->prev_count, (u64)-left); |
| 927 | 928 | ||
| 928 | wrmsrl(hwc->event_base + idx, | 929 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); |
| 930 | |||
| 931 | /* | ||
| 932 | * Due to erratum on certan cpu we need | ||
| 933 | * a second write to be sure the register | ||
| 934 | * is updated properly | ||
| 935 | */ | ||
| 936 | if (x86_pmu.perfctr_second_write) { | ||
| 937 | wrmsrl(hwc->event_base + idx, | ||
| 929 | (u64)(-left) & x86_pmu.cntval_mask); | 938 | (u64)(-left) & x86_pmu.cntval_mask); |
| 939 | } | ||
| 930 | 940 | ||
| 931 | perf_event_update_userpage(event); | 941 | perf_event_update_userpage(event); |
| 932 | 942 | ||
| @@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 969 | * skip the schedulability test here, it will be peformed | 979 | * skip the schedulability test here, it will be peformed |
| 970 | * at commit time(->commit_txn) as a whole | 980 | * at commit time(->commit_txn) as a whole |
| 971 | */ | 981 | */ |
| 972 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 982 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 973 | goto out; | 983 | goto out; |
| 974 | 984 | ||
| 975 | ret = x86_pmu.schedule_events(cpuc, n, assign); | 985 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
| @@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
| 1096 | * The events never got scheduled and ->cancel_txn will truncate | 1106 | * The events never got scheduled and ->cancel_txn will truncate |
| 1097 | * the event_list. | 1107 | * the event_list. |
| 1098 | */ | 1108 | */ |
| 1099 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 1109 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 1100 | return; | 1110 | return; |
| 1101 | 1111 | ||
| 1102 | x86_pmu_stop(event); | 1112 | x86_pmu_stop(event); |
| @@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) | |||
| 1388 | { | 1398 | { |
| 1389 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1399 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1390 | 1400 | ||
| 1391 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | 1401 | cpuc->group_flag |= PERF_EVENT_TXN; |
| 1392 | cpuc->n_txn = 0; | 1402 | cpuc->n_txn = 0; |
| 1393 | } | 1403 | } |
| 1394 | 1404 | ||
| @@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
| 1401 | { | 1411 | { |
| 1402 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1412 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1403 | 1413 | ||
| 1404 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | 1414 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
| 1405 | /* | 1415 | /* |
| 1406 | * Truncate the collected events. | 1416 | * Truncate the collected events. |
| 1407 | */ | 1417 | */ |
| @@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) | |||
| 1435 | */ | 1445 | */ |
| 1436 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1446 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
| 1437 | 1447 | ||
| 1438 | /* | 1448 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
| 1439 | * Clear out the txn count so that ->cancel_txn() which gets | ||
| 1440 | * run after ->commit_txn() doesn't undo things. | ||
| 1441 | */ | ||
| 1442 | cpuc->n_txn = 0; | ||
| 1443 | 1449 | ||
| 1444 | return 0; | 1450 | return 0; |
| 1445 | } | 1451 | } |
| @@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = { | |||
| 1607 | .walk_stack = print_context_stack_bp, | 1613 | .walk_stack = print_context_stack_bp, |
| 1608 | }; | 1614 | }; |
| 1609 | 1615 | ||
| 1610 | #include "../dumpstack.h" | ||
| 1611 | |||
| 1612 | static void | 1616 | static void |
| 1613 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1617 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
| 1614 | { | 1618 | { |
| @@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
| 1730 | return entry; | 1734 | return entry; |
| 1731 | } | 1735 | } |
| 1732 | 1736 | ||
| 1733 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) | ||
| 1734 | { | ||
| 1735 | regs->ip = ip; | ||
| 1736 | /* | ||
| 1737 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
| 1738 | * the skip level | ||
| 1739 | */ | ||
| 1740 | regs->bp = rewind_frame_pointer(skip + 1); | ||
| 1741 | regs->cs = __KERNEL_CS; | ||
| 1742 | /* | ||
| 1743 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
| 1744 | * and the comment with PERF_EFLAGS_EXACT. | ||
| 1745 | */ | ||
| 1746 | regs->flags = 0; | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1737 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
| 1750 | { | 1738 | { |
| 1751 | unsigned long ip; | 1739 | unsigned long ip; |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d1..107711bf0ee8 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -21,22 +21,36 @@ struct p4_event_bind { | |||
| 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
| 22 | }; | 22 | }; |
| 23 | 23 | ||
| 24 | struct p4_cache_event_bind { | 24 | struct p4_pebs_bind { |
| 25 | unsigned int metric_pebs; | 25 | unsigned int metric_pebs; |
| 26 | unsigned int metric_vert; | 26 | unsigned int metric_vert; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | 29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
| 30 | [P4_CACHE__##name] = { \ | 30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ |
| 31 | .metric_pebs = P4_PEBS__##name, \ | 31 | [P4_PEBS_METRIC__##name] = { \ |
| 32 | .metric_vert = P4_VERT__##name, \ | 32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ |
| 33 | .metric_vert = vert, \ | ||
| 33 | } | 34 | } |
| 34 | 35 | ||
| 35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | 36 | /* |
| 36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | 37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here |
| 37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | 38 | * |
| 38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | 39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of |
| 39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | 40 | * event configuration to find out which values are to be |
| 41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | ||
| 42 | * resgisters | ||
| 43 | */ | ||
| 44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | ||
| 45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | ||
| 46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | ||
| 47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | ||
| 48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | ||
| 49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | ||
| 50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | ||
| 51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | ||
| 52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | ||
| 53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | ||
| 40 | }; | 54 | }; |
| 41 | 55 | ||
| 42 | /* | 56 | /* |
| @@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
| 281 | }, | 295 | }, |
| 282 | }; | 296 | }; |
| 283 | 297 | ||
| 284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | 298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
| 285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | 299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
| 286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | 300 | P4_ESCR_EMASK_BIT(event, bit)) | \ |
| 287 | p4_config_pack_cccr(cache_event | \ | 301 | p4_config_pack_cccr(metric | \ |
| 288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | 302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
| 289 | 303 | ||
| 290 | static __initconst const u64 p4_hw_cache_event_ids | 304 | static __initconst const u64 p4_hw_cache_event_ids |
| @@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids | |||
| 296 | [ C(OP_READ) ] = { | 310 | [ C(OP_READ) ] = { |
| 297 | [ C(RESULT_ACCESS) ] = 0x0, | 311 | [ C(RESULT_ACCESS) ] = 0x0, |
| 298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 299 | P4_CACHE__1stl_cache_load_miss_retired), | 313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
| 300 | }, | 314 | }, |
| 301 | }, | 315 | }, |
| 302 | [ C(LL ) ] = { | 316 | [ C(LL ) ] = { |
| 303 | [ C(OP_READ) ] = { | 317 | [ C(OP_READ) ] = { |
| 304 | [ C(RESULT_ACCESS) ] = 0x0, | 318 | [ C(RESULT_ACCESS) ] = 0x0, |
| 305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 306 | P4_CACHE__2ndl_cache_load_miss_retired), | 320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
| 307 | }, | 321 | }, |
| 308 | }, | 322 | }, |
| 309 | [ C(DTLB) ] = { | 323 | [ C(DTLB) ] = { |
| 310 | [ C(OP_READ) ] = { | 324 | [ C(OP_READ) ] = { |
| 311 | [ C(RESULT_ACCESS) ] = 0x0, | 325 | [ C(RESULT_ACCESS) ] = 0x0, |
| 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 313 | P4_CACHE__dtlb_load_miss_retired), | 327 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
| 314 | }, | 328 | }, |
| 315 | [ C(OP_WRITE) ] = { | 329 | [ C(OP_WRITE) ] = { |
| 316 | [ C(RESULT_ACCESS) ] = 0x0, | 330 | [ C(RESULT_ACCESS) ] = 0x0, |
| 317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 318 | P4_CACHE__dtlb_store_miss_retired), | 332 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
| 319 | }, | 333 | }, |
| 320 | }, | 334 | }, |
| 321 | [ C(ITLB) ] = { | 335 | [ C(ITLB) ] = { |
| 322 | [ C(OP_READ) ] = { | 336 | [ C(OP_READ) ] = { |
| 323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | 337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
| 324 | P4_CACHE__itlb_reference_hit), | 338 | P4_PEBS_METRIC__none), |
| 325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | 339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
| 326 | P4_CACHE__itlb_reference_miss), | 340 | P4_PEBS_METRIC__none), |
| 327 | }, | 341 | }, |
| 328 | [ C(OP_WRITE) ] = { | 342 | [ C(OP_WRITE) ] = { |
| 329 | [ C(RESULT_ACCESS) ] = -1, | 343 | [ C(RESULT_ACCESS) ] = -1, |
| @@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event) | |||
| 414 | return config; | 428 | return config; |
| 415 | } | 429 | } |
| 416 | 430 | ||
| 431 | static int p4_validate_raw_event(struct perf_event *event) | ||
| 432 | { | ||
| 433 | unsigned int v; | ||
| 434 | |||
| 435 | /* user data may have out-of-bound event index */ | ||
| 436 | v = p4_config_unpack_event(event->attr.config); | ||
| 437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | ||
| 438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | ||
| 439 | return -EINVAL; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * it may have some screwed PEBS bits | ||
| 444 | */ | ||
| 445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | ||
| 446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
| 447 | return -EINVAL; | ||
| 448 | } | ||
| 449 | v = p4_config_unpack_metric(event->attr.config); | ||
| 450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | ||
| 451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
| 452 | return -EINVAL; | ||
| 453 | } | ||
| 454 | |||
| 455 | return 0; | ||
| 456 | } | ||
| 457 | |||
| 417 | static int p4_hw_config(struct perf_event *event) | 458 | static int p4_hw_config(struct perf_event *event) |
| 418 | { | 459 | { |
| 419 | int cpu = get_cpu(); | 460 | int cpu = get_cpu(); |
| 420 | int rc = 0; | 461 | int rc = 0; |
| 421 | unsigned int evnt; | ||
| 422 | u32 escr, cccr; | 462 | u32 escr, cccr; |
| 423 | 463 | ||
| 424 | /* | 464 | /* |
| @@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event) | |||
| 438 | 478 | ||
| 439 | if (event->attr.type == PERF_TYPE_RAW) { | 479 | if (event->attr.type == PERF_TYPE_RAW) { |
| 440 | 480 | ||
| 441 | /* user data may have out-of-bound event index */ | 481 | rc = p4_validate_raw_event(event); |
| 442 | evnt = p4_config_unpack_event(event->attr.config); | 482 | if (rc) |
| 443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
| 444 | rc = -EINVAL; | ||
| 445 | goto out; | 483 | goto out; |
| 446 | } | ||
| 447 | 484 | ||
| 448 | /* | 485 | /* |
| 449 | * We don't control raw events so it's up to the caller | 486 | * We don't control raw events so it's up to the caller |
| @@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event) | |||
| 451 | * on HT machine but allow HT-compatible specifics to be | 488 | * on HT machine but allow HT-compatible specifics to be |
| 452 | * passed on) | 489 | * passed on) |
| 453 | * | 490 | * |
| 491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | ||
| 492 | * bits since we keep additional info here (for cache events and etc) | ||
| 493 | * | ||
| 454 | * XXX: HT wide things should check perf_paranoid_cpu() && | 494 | * XXX: HT wide things should check perf_paranoid_cpu() && |
| 455 | * CAP_SYS_ADMIN | 495 | * CAP_SYS_ADMIN |
| 456 | */ | 496 | */ |
| 457 | event->hw.config |= event->attr.config & | 497 | event->hw.config |= event->attr.config & |
| 458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | |
| 459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); |
| 460 | } | 500 | } |
| 461 | 501 | ||
| 462 | rc = x86_setup_perfctr(event); | 502 | rc = x86_setup_perfctr(event); |
| @@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |||
| 482 | return overflow; | 522 | return overflow; |
| 483 | } | 523 | } |
| 484 | 524 | ||
| 525 | static void p4_pmu_disable_pebs(void) | ||
| 526 | { | ||
| 527 | /* | ||
| 528 | * FIXME | ||
| 529 | * | ||
| 530 | * It's still allowed that two threads setup same cache | ||
| 531 | * events so we can't simply clear metrics until we knew | ||
| 532 | * noone is depending on us, so we need kind of counter | ||
| 533 | * for "ReplayEvent" users. | ||
| 534 | * | ||
| 535 | * What is more complex -- RAW events, if user (for some | ||
| 536 | * reason) will pass some cache event metric with improper | ||
| 537 | * event opcode -- it's fine from hardware point of view | ||
| 538 | * but completely nonsence from "meaning" of such action. | ||
| 539 | * | ||
| 540 | * So at moment let leave metrics turned on forever -- it's | ||
| 541 | * ok for now but need to be revisited! | ||
| 542 | * | ||
| 543 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | ||
| 544 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | ||
| 545 | */ | ||
| 546 | } | ||
| 547 | |||
| 485 | static inline void p4_pmu_disable_event(struct perf_event *event) | 548 | static inline void p4_pmu_disable_event(struct perf_event *event) |
| 486 | { | 549 | { |
| 487 | struct hw_perf_event *hwc = &event->hw; | 550 | struct hw_perf_event *hwc = &event->hw; |
| @@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void) | |||
| 507 | continue; | 570 | continue; |
| 508 | p4_pmu_disable_event(event); | 571 | p4_pmu_disable_event(event); |
| 509 | } | 572 | } |
| 573 | |||
| 574 | p4_pmu_disable_pebs(); | ||
| 575 | } | ||
| 576 | |||
| 577 | /* configuration must be valid */ | ||
| 578 | static void p4_pmu_enable_pebs(u64 config) | ||
| 579 | { | ||
| 580 | struct p4_pebs_bind *bind; | ||
| 581 | unsigned int idx; | ||
| 582 | |||
| 583 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | ||
| 584 | |||
| 585 | idx = p4_config_unpack_metric(config); | ||
| 586 | if (idx == P4_PEBS_METRIC__none) | ||
| 587 | return; | ||
| 588 | |||
| 589 | bind = &p4_pebs_bind_map[idx]; | ||
| 590 | |||
| 591 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | ||
| 592 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | ||
| 510 | } | 593 | } |
| 511 | 594 | ||
| 512 | static void p4_pmu_enable_event(struct perf_event *event) | 595 | static void p4_pmu_enable_event(struct perf_event *event) |
| @@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
| 515 | int thread = p4_ht_config_thread(hwc->config); | 598 | int thread = p4_ht_config_thread(hwc->config); |
| 516 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | 599 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); |
| 517 | unsigned int idx = p4_config_unpack_event(hwc->config); | 600 | unsigned int idx = p4_config_unpack_event(hwc->config); |
| 518 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
| 519 | struct p4_event_bind *bind; | 601 | struct p4_event_bind *bind; |
| 520 | struct p4_cache_event_bind *bind_cache; | ||
| 521 | u64 escr_addr, cccr; | 602 | u64 escr_addr, cccr; |
| 522 | 603 | ||
| 523 | bind = &p4_event_bind_map[idx]; | 604 | bind = &p4_event_bind_map[idx]; |
| @@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
| 537 | cccr = p4_config_unpack_cccr(hwc->config); | 618 | cccr = p4_config_unpack_cccr(hwc->config); |
| 538 | 619 | ||
| 539 | /* | 620 | /* |
| 540 | * it could be Cache event so that we need to | 621 | * it could be Cache event so we need to write metrics |
| 541 | * set metrics into additional MSRs | 622 | * into additional MSRs |
| 542 | */ | 623 | */ |
| 543 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | 624 | p4_pmu_enable_pebs(hwc->config); |
| 544 | if (idx_cache > P4_CACHE__NONE && | ||
| 545 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
| 546 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
| 547 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
| 548 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
| 549 | } | ||
| 550 | 625 | ||
| 551 | (void)checking_wrmsrl(escr_addr, escr_conf); | 626 | (void)checking_wrmsrl(escr_addr, escr_conf); |
| 552 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 627 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
| @@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
| 829 | .max_period = (1ULL << 39) - 1, | 904 | .max_period = (1ULL << 39) - 1, |
| 830 | .hw_config = p4_hw_config, | 905 | .hw_config = p4_hw_config, |
| 831 | .schedule_events = p4_pmu_schedule_events, | 906 | .schedule_events = p4_pmu_schedule_events, |
| 907 | /* | ||
| 908 | * This handles erratum N15 in intel doc 249199-029, | ||
| 909 | * the counter may not be updated correctly on write | ||
| 910 | * so we need a second write operation to do the trick | ||
| 911 | * (the official workaround didn't work) | ||
| 912 | * | ||
| 913 | * the former idea is taken from OProfile code | ||
| 914 | */ | ||
| 915 | .perfctr_second_write = 1, | ||
| 832 | }; | 916 | }; |
| 833 | 917 | ||
| 834 | static __init int p4_pmu_init(void) | 918 | static __init int p4_pmu_init(void) |
