aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-27 18:23:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-27 18:23:47 -0400
commitc5617b200ac52e35f7e8cf05a17b0a2d50f6b3e9 (patch)
tree40d5e99660c77c5791392d349a93113c044dbf14 /arch
parentcad719d86e9dbd06634eaba6401e022c8101d6b2 (diff)
parent49c177461bfbedeccbab22bf3905db2f9da7f1c3 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (61 commits) tracing: Add __used annotation to event variable perf, trace: Fix !x86 build bug perf report: Support multiple events on the TUI perf annotate: Fix up usage of the build id cache x86/mmiotrace: Remove redundant instruction prefix checks perf annotate: Add TUI interface perf tui: Remove annotate from popup menu after failure perf report: Don't start the TUI if -D is used perf: Fix getline undeclared perf: Optimize perf_tp_event_match() perf: Remove more code from the fastpath perf: Optimize the !vmalloc backed buffer perf: Optimize perf_output_copy() perf: Fix wakeup storm for RO mmap()s perf-record: Share per-cpu buffers perf-record: Remove -M perf: Ensure that IOC_OUTPUT isn't used to create multi-writer buffers perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction perf tui: Allow disabling the TUI on a per command basis in ~/.perfconfig ...
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc/kernel/perf_event.c108
-rw-r--r--arch/x86/include/asm/perf_event_p4.h3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c41
-rw-r--r--arch/x86/mm/pf_in.c2
5 files changed, 90 insertions, 70 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 34ce49f80eac..0ec92c8861dd 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -92,6 +92,8 @@ struct cpu_hw_events {
92 92
93 /* Enabled/disable state. */ 93 /* Enabled/disable state. */
94 int enabled; 94 int enabled;
95
96 unsigned int group_flag;
95}; 97};
96DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; 98DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
97 99
@@ -981,53 +983,6 @@ static int collect_events(struct perf_event *group, int max_count,
981 return n; 983 return n;
982} 984}
983 985
984static void event_sched_in(struct perf_event *event)
985{
986 event->state = PERF_EVENT_STATE_ACTIVE;
987 event->oncpu = smp_processor_id();
988 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
989 if (is_software_event(event))
990 event->pmu->enable(event);
991}
992
993int hw_perf_group_sched_in(struct perf_event *group_leader,
994 struct perf_cpu_context *cpuctx,
995 struct perf_event_context *ctx)
996{
997 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
998 struct perf_event *sub;
999 int n0, n;
1000
1001 if (!sparc_pmu)
1002 return 0;
1003
1004 n0 = cpuc->n_events;
1005 n = collect_events(group_leader, perf_max_events - n0,
1006 &cpuc->event[n0], &cpuc->events[n0],
1007 &cpuc->current_idx[n0]);
1008 if (n < 0)
1009 return -EAGAIN;
1010 if (check_excludes(cpuc->event, n0, n))
1011 return -EINVAL;
1012 if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0))
1013 return -EAGAIN;
1014 cpuc->n_events = n0 + n;
1015 cpuc->n_added += n;
1016
1017 cpuctx->active_oncpu += n;
1018 n = 1;
1019 event_sched_in(group_leader);
1020 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
1021 if (sub->state != PERF_EVENT_STATE_OFF) {
1022 event_sched_in(sub);
1023 n++;
1024 }
1025 }
1026 ctx->nr_active += n;
1027
1028 return 1;
1029}
1030
1031static int sparc_pmu_enable(struct perf_event *event) 986static int sparc_pmu_enable(struct perf_event *event)
1032{ 987{
1033 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 988 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1045,11 +1000,20 @@ static int sparc_pmu_enable(struct perf_event *event)
1045 cpuc->events[n0] = event->hw.event_base; 1000 cpuc->events[n0] = event->hw.event_base;
1046 cpuc->current_idx[n0] = PIC_NO_INDEX; 1001 cpuc->current_idx[n0] = PIC_NO_INDEX;
1047 1002
1003 /*
1004 * If group events scheduling transaction was started,
1005 * skip the schedulability test here, it will be peformed
1006 * at commit time(->commit_txn) as a whole
1007 */
1008 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
1009 goto nocheck;
1010
1048 if (check_excludes(cpuc->event, n0, 1)) 1011 if (check_excludes(cpuc->event, n0, 1))
1049 goto out; 1012 goto out;
1050 if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) 1013 if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1))
1051 goto out; 1014 goto out;
1052 1015
1016nocheck:
1053 cpuc->n_events++; 1017 cpuc->n_events++;
1054 cpuc->n_added++; 1018 cpuc->n_added++;
1055 1019
@@ -1129,11 +1093,61 @@ static int __hw_perf_event_init(struct perf_event *event)
1129 return 0; 1093 return 0;
1130} 1094}
1131 1095
1096/*
1097 * Start group events scheduling transaction
1098 * Set the flag to make pmu::enable() not perform the
1099 * schedulability test, it will be performed at commit time
1100 */
1101static void sparc_pmu_start_txn(const struct pmu *pmu)
1102{
1103 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1104
1105 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
1106}
1107
1108/*
1109 * Stop group events scheduling transaction
1110 * Clear the flag and pmu::enable() will perform the
1111 * schedulability test.
1112 */
1113static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1114{
1115 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1116
1117 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
1118}
1119
1120/*
1121 * Commit group events scheduling transaction
1122 * Perform the group schedulability test as a whole
1123 * Return 0 if success
1124 */
1125static int sparc_pmu_commit_txn(const struct pmu *pmu)
1126{
1127 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1128 int n;
1129
1130 if (!sparc_pmu)
1131 return -EINVAL;
1132
1133 cpuc = &__get_cpu_var(cpu_hw_events);
1134 n = cpuc->n_events;
1135 if (check_excludes(cpuc->event, 0, n))
1136 return -EINVAL;
1137 if (sparc_check_constraints(cpuc->event, cpuc->events, n))
1138 return -EAGAIN;
1139
1140 return 0;
1141}
1142
1132static const struct pmu pmu = { 1143static const struct pmu pmu = {
1133 .enable = sparc_pmu_enable, 1144 .enable = sparc_pmu_enable,
1134 .disable = sparc_pmu_disable, 1145 .disable = sparc_pmu_disable,
1135 .read = sparc_pmu_read, 1146 .read = sparc_pmu_read,
1136 .unthrottle = sparc_pmu_unthrottle, 1147 .unthrottle = sparc_pmu_unthrottle,
1148 .start_txn = sparc_pmu_start_txn,
1149 .cancel_txn = sparc_pmu_cancel_txn,
1150 .commit_txn = sparc_pmu_commit_txn,
1137}; 1151};
1138 1152
1139const struct pmu *hw_perf_event_init(struct perf_event *event) 1153const struct pmu *hw_perf_event_init(struct perf_event *event)
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index b05400a542ff..64a8ebff06fc 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -89,7 +89,8 @@
89 P4_CCCR_ENABLE) 89 P4_CCCR_ENABLE)
90 90
91/* HT mask */ 91/* HT mask */
92#define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) 92#define P4_CCCR_MASK_HT \
93 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
93 94
94#define P4_GEN_ESCR_EMASK(class, name, bit) \ 95#define P4_GEN_ESCR_EMASK(class, name, bit) \
95 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 96 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fd4db0db3708..c77586061bcb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1717 */ 1717 */
1718 regs->bp = rewind_frame_pointer(skip + 1); 1718 regs->bp = rewind_frame_pointer(skip + 1);
1719 regs->cs = __KERNEL_CS; 1719 regs->cs = __KERNEL_CS;
1720 local_save_flags(regs->flags); 1720 /*
1721 * We abuse bit 3 to pass exact information, see perf_misc_flags
1722 * and the comment with PERF_EFLAGS_EXACT.
1723 */
1724 regs->flags = 0;
1721} 1725}
1722 1726
1723unsigned long perf_instruction_pointer(struct pt_regs *regs) 1727unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 424fc8de68e4..ae85d69644d1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -465,15 +465,21 @@ out:
465 return rc; 465 return rc;
466} 466}
467 467
468static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 468static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
469{ 469{
470 unsigned long dummy; 470 int overflow = 0;
471 u32 low, high;
471 472
472 rdmsrl(hwc->config_base + hwc->idx, dummy); 473 rdmsr(hwc->config_base + hwc->idx, low, high);
473 if (dummy & P4_CCCR_OVF) { 474
475 /* we need to check high bit for unflagged overflows */
476 if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) {
477 overflow = 1;
474 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 478 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
475 ((u64)dummy) & ~P4_CCCR_OVF); 479 ((u64)low) & ~P4_CCCR_OVF);
476 } 480 }
481
482 return overflow;
477} 483}
478 484
479static inline void p4_pmu_disable_event(struct perf_event *event) 485static inline void p4_pmu_disable_event(struct perf_event *event)
@@ -584,21 +590,15 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
584 590
585 WARN_ON_ONCE(hwc->idx != idx); 591 WARN_ON_ONCE(hwc->idx != idx);
586 592
587 /* 593 /* it might be unflagged overflow */
588 * FIXME: Redundant call, actually not needed 594 handled = p4_pmu_clear_cccr_ovf(hwc);
589 * but just to check if we're screwed
590 */
591 p4_pmu_clear_cccr_ovf(hwc);
592 595
593 val = x86_perf_event_update(event); 596 val = x86_perf_event_update(event);
594 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 597 if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
595 continue; 598 continue;
596 599
597 /* 600 /* event overflow for sure */
598 * event overflow 601 data.period = event->hw.last_period;
599 */
600 handled = 1;
601 data.period = event->hw.last_period;
602 602
603 if (!x86_perf_event_set_period(event)) 603 if (!x86_perf_event_set_period(event))
604 continue; 604 continue;
@@ -670,7 +670,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
670 670
671/* 671/*
672 * ESCR address hashing is tricky, ESCRs are not sequential 672 * ESCR address hashing is tricky, ESCRs are not sequential
673 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and 673 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
674 * the metric between any ESCRs is laid in range [0xa0,0xe1] 674 * the metric between any ESCRs is laid in range [0xa0,0xe1]
675 * 675 *
676 * so we make ~70% filled hashtable 676 * so we make ~70% filled hashtable
@@ -735,8 +735,9 @@ static int p4_get_escr_idx(unsigned int addr)
735{ 735{
736 unsigned int idx = P4_ESCR_MSR_IDX(addr); 736 unsigned int idx = P4_ESCR_MSR_IDX(addr);
737 737
738 if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || 738 if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
739 !p4_escr_table[idx])) { 739 !p4_escr_table[idx] ||
740 p4_escr_table[idx] != addr)) {
740 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); 741 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
741 return -1; 742 return -1;
742 } 743 }
@@ -762,7 +763,7 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
762{ 763{
763 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 764 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
764 unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; 765 unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
765 int cpu = raw_smp_processor_id(); 766 int cpu = smp_processor_id();
766 struct hw_perf_event *hwc; 767 struct hw_perf_event *hwc;
767 struct p4_event_bind *bind; 768 struct p4_event_bind *bind;
768 unsigned int i, thread, num; 769 unsigned int i, thread, num;
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index df3d5c861cda..308e32570d84 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -34,7 +34,7 @@
34/* IA32 Manual 3, 2-1 */ 34/* IA32 Manual 3, 2-1 */
35static unsigned char prefix_codes[] = { 35static unsigned char prefix_codes[] = {
36 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, 36 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
37 0x65, 0x2E, 0x3E, 0x66, 0x67 37 0x65, 0x66, 0x67
38}; 38};
39/* IA32 Manual 3, 3-432*/ 39/* IA32 Manual 3, 3-432*/
40static unsigned int reg_rop[] = { 40static unsigned int reg_rop[] = {