diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 18:23:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 18:23:47 -0400 |
commit | c5617b200ac52e35f7e8cf05a17b0a2d50f6b3e9 (patch) | |
tree | 40d5e99660c77c5791392d349a93113c044dbf14 /arch | |
parent | cad719d86e9dbd06634eaba6401e022c8101d6b2 (diff) | |
parent | 49c177461bfbedeccbab22bf3905db2f9da7f1c3 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (61 commits)
tracing: Add __used annotation to event variable
perf, trace: Fix !x86 build bug
perf report: Support multiple events on the TUI
perf annotate: Fix up usage of the build id cache
x86/mmiotrace: Remove redundant instruction prefix checks
perf annotate: Add TUI interface
perf tui: Remove annotate from popup menu after failure
perf report: Don't start the TUI if -D is used
perf: Fix getline undeclared
perf: Optimize perf_tp_event_match()
perf: Remove more code from the fastpath
perf: Optimize the !vmalloc backed buffer
perf: Optimize perf_output_copy()
perf: Fix wakeup storm for RO mmap()s
perf-record: Share per-cpu buffers
perf-record: Remove -M
perf: Ensure that IOC_OUTPUT isn't used to create multi-writer buffers
perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction
perf tui: Allow disabling the TUI on a per command basis in ~/.perfconfig
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 108 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event_p4.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 41 | ||||
-rw-r--r-- | arch/x86/mm/pf_in.c | 2 |
5 files changed, 90 insertions, 70 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 34ce49f80eac..0ec92c8861dd 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -92,6 +92,8 @@ struct cpu_hw_events { | |||
92 | 92 | ||
93 | /* Enabled/disable state. */ | 93 | /* Enabled/disable state. */ |
94 | int enabled; | 94 | int enabled; |
95 | |||
96 | unsigned int group_flag; | ||
95 | }; | 97 | }; |
96 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | 98 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; |
97 | 99 | ||
@@ -981,53 +983,6 @@ static int collect_events(struct perf_event *group, int max_count, | |||
981 | return n; | 983 | return n; |
982 | } | 984 | } |
983 | 985 | ||
984 | static void event_sched_in(struct perf_event *event) | ||
985 | { | ||
986 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
987 | event->oncpu = smp_processor_id(); | ||
988 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
989 | if (is_software_event(event)) | ||
990 | event->pmu->enable(event); | ||
991 | } | ||
992 | |||
993 | int hw_perf_group_sched_in(struct perf_event *group_leader, | ||
994 | struct perf_cpu_context *cpuctx, | ||
995 | struct perf_event_context *ctx) | ||
996 | { | ||
997 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
998 | struct perf_event *sub; | ||
999 | int n0, n; | ||
1000 | |||
1001 | if (!sparc_pmu) | ||
1002 | return 0; | ||
1003 | |||
1004 | n0 = cpuc->n_events; | ||
1005 | n = collect_events(group_leader, perf_max_events - n0, | ||
1006 | &cpuc->event[n0], &cpuc->events[n0], | ||
1007 | &cpuc->current_idx[n0]); | ||
1008 | if (n < 0) | ||
1009 | return -EAGAIN; | ||
1010 | if (check_excludes(cpuc->event, n0, n)) | ||
1011 | return -EINVAL; | ||
1012 | if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0)) | ||
1013 | return -EAGAIN; | ||
1014 | cpuc->n_events = n0 + n; | ||
1015 | cpuc->n_added += n; | ||
1016 | |||
1017 | cpuctx->active_oncpu += n; | ||
1018 | n = 1; | ||
1019 | event_sched_in(group_leader); | ||
1020 | list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { | ||
1021 | if (sub->state != PERF_EVENT_STATE_OFF) { | ||
1022 | event_sched_in(sub); | ||
1023 | n++; | ||
1024 | } | ||
1025 | } | ||
1026 | ctx->nr_active += n; | ||
1027 | |||
1028 | return 1; | ||
1029 | } | ||
1030 | |||
1031 | static int sparc_pmu_enable(struct perf_event *event) | 986 | static int sparc_pmu_enable(struct perf_event *event) |
1032 | { | 987 | { |
1033 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 988 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1045,11 +1000,20 @@ static int sparc_pmu_enable(struct perf_event *event) | |||
1045 | cpuc->events[n0] = event->hw.event_base; | 1000 | cpuc->events[n0] = event->hw.event_base; |
1046 | cpuc->current_idx[n0] = PIC_NO_INDEX; | 1001 | cpuc->current_idx[n0] = PIC_NO_INDEX; |
1047 | 1002 | ||
1003 | /* | ||
1004 | * If group events scheduling transaction was started, | ||
1005 | * skip the schedulability test here, it will be peformed | ||
1006 | * at commit time(->commit_txn) as a whole | ||
1007 | */ | ||
1008 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
1009 | goto nocheck; | ||
1010 | |||
1048 | if (check_excludes(cpuc->event, n0, 1)) | 1011 | if (check_excludes(cpuc->event, n0, 1)) |
1049 | goto out; | 1012 | goto out; |
1050 | if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) | 1013 | if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) |
1051 | goto out; | 1014 | goto out; |
1052 | 1015 | ||
1016 | nocheck: | ||
1053 | cpuc->n_events++; | 1017 | cpuc->n_events++; |
1054 | cpuc->n_added++; | 1018 | cpuc->n_added++; |
1055 | 1019 | ||
@@ -1129,11 +1093,61 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1129 | return 0; | 1093 | return 0; |
1130 | } | 1094 | } |
1131 | 1095 | ||
1096 | /* | ||
1097 | * Start group events scheduling transaction | ||
1098 | * Set the flag to make pmu::enable() not perform the | ||
1099 | * schedulability test, it will be performed at commit time | ||
1100 | */ | ||
1101 | static void sparc_pmu_start_txn(const struct pmu *pmu) | ||
1102 | { | ||
1103 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
1104 | |||
1105 | cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | * Stop group events scheduling transaction | ||
1110 | * Clear the flag and pmu::enable() will perform the | ||
1111 | * schedulability test. | ||
1112 | */ | ||
1113 | static void sparc_pmu_cancel_txn(const struct pmu *pmu) | ||
1114 | { | ||
1115 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
1116 | |||
1117 | cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
1118 | } | ||
1119 | |||
1120 | /* | ||
1121 | * Commit group events scheduling transaction | ||
1122 | * Perform the group schedulability test as a whole | ||
1123 | * Return 0 if success | ||
1124 | */ | ||
1125 | static int sparc_pmu_commit_txn(const struct pmu *pmu) | ||
1126 | { | ||
1127 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1128 | int n; | ||
1129 | |||
1130 | if (!sparc_pmu) | ||
1131 | return -EINVAL; | ||
1132 | |||
1133 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1134 | n = cpuc->n_events; | ||
1135 | if (check_excludes(cpuc->event, 0, n)) | ||
1136 | return -EINVAL; | ||
1137 | if (sparc_check_constraints(cpuc->event, cpuc->events, n)) | ||
1138 | return -EAGAIN; | ||
1139 | |||
1140 | return 0; | ||
1141 | } | ||
1142 | |||
1132 | static const struct pmu pmu = { | 1143 | static const struct pmu pmu = { |
1133 | .enable = sparc_pmu_enable, | 1144 | .enable = sparc_pmu_enable, |
1134 | .disable = sparc_pmu_disable, | 1145 | .disable = sparc_pmu_disable, |
1135 | .read = sparc_pmu_read, | 1146 | .read = sparc_pmu_read, |
1136 | .unthrottle = sparc_pmu_unthrottle, | 1147 | .unthrottle = sparc_pmu_unthrottle, |
1148 | .start_txn = sparc_pmu_start_txn, | ||
1149 | .cancel_txn = sparc_pmu_cancel_txn, | ||
1150 | .commit_txn = sparc_pmu_commit_txn, | ||
1137 | }; | 1151 | }; |
1138 | 1152 | ||
1139 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1153 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b05400a542ff..64a8ebff06fc 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -89,7 +89,8 @@ | |||
89 | P4_CCCR_ENABLE) | 89 | P4_CCCR_ENABLE) |
90 | 90 | ||
91 | /* HT mask */ | 91 | /* HT mask */ |
92 | #define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) | 92 | #define P4_CCCR_MASK_HT \ |
93 | (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) | ||
93 | 94 | ||
94 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | 95 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ |
95 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | 96 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fd4db0db3708..c77586061bcb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
1717 | */ | 1717 | */ |
1718 | regs->bp = rewind_frame_pointer(skip + 1); | 1718 | regs->bp = rewind_frame_pointer(skip + 1); |
1719 | regs->cs = __KERNEL_CS; | 1719 | regs->cs = __KERNEL_CS; |
1720 | local_save_flags(regs->flags); | 1720 | /* |
1721 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
1722 | * and the comment with PERF_EFLAGS_EXACT. | ||
1723 | */ | ||
1724 | regs->flags = 0; | ||
1721 | } | 1725 | } |
1722 | 1726 | ||
1723 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1727 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 424fc8de68e4..ae85d69644d1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -465,15 +465,21 @@ out: | |||
465 | return rc; | 465 | return rc; |
466 | } | 466 | } |
467 | 467 | ||
468 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | 468 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
469 | { | 469 | { |
470 | unsigned long dummy; | 470 | int overflow = 0; |
471 | u32 low, high; | ||
471 | 472 | ||
472 | rdmsrl(hwc->config_base + hwc->idx, dummy); | 473 | rdmsr(hwc->config_base + hwc->idx, low, high); |
473 | if (dummy & P4_CCCR_OVF) { | 474 | |
475 | /* we need to check high bit for unflagged overflows */ | ||
476 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
477 | overflow = 1; | ||
474 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 478 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
475 | ((u64)dummy) & ~P4_CCCR_OVF); | 479 | ((u64)low) & ~P4_CCCR_OVF); |
476 | } | 480 | } |
481 | |||
482 | return overflow; | ||
477 | } | 483 | } |
478 | 484 | ||
479 | static inline void p4_pmu_disable_event(struct perf_event *event) | 485 | static inline void p4_pmu_disable_event(struct perf_event *event) |
@@ -584,21 +590,15 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
584 | 590 | ||
585 | WARN_ON_ONCE(hwc->idx != idx); | 591 | WARN_ON_ONCE(hwc->idx != idx); |
586 | 592 | ||
587 | /* | 593 | /* it might be unflagged overflow */ |
588 | * FIXME: Redundant call, actually not needed | 594 | handled = p4_pmu_clear_cccr_ovf(hwc); |
589 | * but just to check if we're screwed | ||
590 | */ | ||
591 | p4_pmu_clear_cccr_ovf(hwc); | ||
592 | 595 | ||
593 | val = x86_perf_event_update(event); | 596 | val = x86_perf_event_update(event); |
594 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | 597 | if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
595 | continue; | 598 | continue; |
596 | 599 | ||
597 | /* | 600 | /* event overflow for sure */ |
598 | * event overflow | 601 | data.period = event->hw.last_period; |
599 | */ | ||
600 | handled = 1; | ||
601 | data.period = event->hw.last_period; | ||
602 | 602 | ||
603 | if (!x86_perf_event_set_period(event)) | 603 | if (!x86_perf_event_set_period(event)) |
604 | continue; | 604 | continue; |
@@ -670,7 +670,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |||
670 | 670 | ||
671 | /* | 671 | /* |
672 | * ESCR address hashing is tricky, ESCRs are not sequential | 672 | * ESCR address hashing is tricky, ESCRs are not sequential |
673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and | 673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and |
674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | 674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] |
675 | * | 675 | * |
676 | * so we make ~70% filled hashtable | 676 | * so we make ~70% filled hashtable |
@@ -735,8 +735,9 @@ static int p4_get_escr_idx(unsigned int addr) | |||
735 | { | 735 | { |
736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | 736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); |
737 | 737 | ||
738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | 738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || |
739 | !p4_escr_table[idx])) { | 739 | !p4_escr_table[idx] || |
740 | p4_escr_table[idx] != addr)) { | ||
740 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | 741 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); |
741 | return -1; | 742 | return -1; |
742 | } | 743 | } |
@@ -762,7 +763,7 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
762 | { | 763 | { |
763 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 764 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
764 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | 765 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; |
765 | int cpu = raw_smp_processor_id(); | 766 | int cpu = smp_processor_id(); |
766 | struct hw_perf_event *hwc; | 767 | struct hw_perf_event *hwc; |
767 | struct p4_event_bind *bind; | 768 | struct p4_event_bind *bind; |
768 | unsigned int i, thread, num; | 769 | unsigned int i, thread, num; |
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index df3d5c861cda..308e32570d84 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c | |||
@@ -34,7 +34,7 @@ | |||
34 | /* IA32 Manual 3, 2-1 */ | 34 | /* IA32 Manual 3, 2-1 */ |
35 | static unsigned char prefix_codes[] = { | 35 | static unsigned char prefix_codes[] = { |
36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, | 36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, |
37 | 0x65, 0x2E, 0x3E, 0x66, 0x67 | 37 | 0x65, 0x66, 0x67 |
38 | }; | 38 | }; |
39 | /* IA32 Manual 3, 3-432*/ | 39 | /* IA32 Manual 3, 3-432*/ |
40 | static unsigned int reg_rop[] = { | 40 | static unsigned int reg_rop[] = { |