diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/sparc/kernel/perf_event.c | 108 | ||||
| -rw-r--r-- | arch/x86/include/asm/perf_event_p4.h | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 41 | ||||
| -rw-r--r-- | arch/x86/mm/pf_in.c | 2 |
5 files changed, 90 insertions, 70 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 34ce49f80eac..0ec92c8861dd 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
| @@ -92,6 +92,8 @@ struct cpu_hw_events { | |||
| 92 | 92 | ||
| 93 | /* Enabled/disable state. */ | 93 | /* Enabled/disable state. */ |
| 94 | int enabled; | 94 | int enabled; |
| 95 | |||
| 96 | unsigned int group_flag; | ||
| 95 | }; | 97 | }; |
| 96 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | 98 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; |
| 97 | 99 | ||
| @@ -981,53 +983,6 @@ static int collect_events(struct perf_event *group, int max_count, | |||
| 981 | return n; | 983 | return n; |
| 982 | } | 984 | } |
| 983 | 985 | ||
| 984 | static void event_sched_in(struct perf_event *event) | ||
| 985 | { | ||
| 986 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
| 987 | event->oncpu = smp_processor_id(); | ||
| 988 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
| 989 | if (is_software_event(event)) | ||
| 990 | event->pmu->enable(event); | ||
| 991 | } | ||
| 992 | |||
| 993 | int hw_perf_group_sched_in(struct perf_event *group_leader, | ||
| 994 | struct perf_cpu_context *cpuctx, | ||
| 995 | struct perf_event_context *ctx) | ||
| 996 | { | ||
| 997 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 998 | struct perf_event *sub; | ||
| 999 | int n0, n; | ||
| 1000 | |||
| 1001 | if (!sparc_pmu) | ||
| 1002 | return 0; | ||
| 1003 | |||
| 1004 | n0 = cpuc->n_events; | ||
| 1005 | n = collect_events(group_leader, perf_max_events - n0, | ||
| 1006 | &cpuc->event[n0], &cpuc->events[n0], | ||
| 1007 | &cpuc->current_idx[n0]); | ||
| 1008 | if (n < 0) | ||
| 1009 | return -EAGAIN; | ||
| 1010 | if (check_excludes(cpuc->event, n0, n)) | ||
| 1011 | return -EINVAL; | ||
| 1012 | if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0)) | ||
| 1013 | return -EAGAIN; | ||
| 1014 | cpuc->n_events = n0 + n; | ||
| 1015 | cpuc->n_added += n; | ||
| 1016 | |||
| 1017 | cpuctx->active_oncpu += n; | ||
| 1018 | n = 1; | ||
| 1019 | event_sched_in(group_leader); | ||
| 1020 | list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { | ||
| 1021 | if (sub->state != PERF_EVENT_STATE_OFF) { | ||
| 1022 | event_sched_in(sub); | ||
| 1023 | n++; | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | ctx->nr_active += n; | ||
| 1027 | |||
| 1028 | return 1; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | static int sparc_pmu_enable(struct perf_event *event) | 986 | static int sparc_pmu_enable(struct perf_event *event) |
| 1032 | { | 987 | { |
| 1033 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 988 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -1045,11 +1000,20 @@ static int sparc_pmu_enable(struct perf_event *event) | |||
| 1045 | cpuc->events[n0] = event->hw.event_base; | 1000 | cpuc->events[n0] = event->hw.event_base; |
| 1046 | cpuc->current_idx[n0] = PIC_NO_INDEX; | 1001 | cpuc->current_idx[n0] = PIC_NO_INDEX; |
| 1047 | 1002 | ||
| 1003 | /* | ||
| 1004 | * If group events scheduling transaction was started, | ||
| 1005 | * skip the schedulability test here, it will be peformed | ||
| 1006 | * at commit time(->commit_txn) as a whole | ||
| 1007 | */ | ||
| 1008 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
| 1009 | goto nocheck; | ||
| 1010 | |||
| 1048 | if (check_excludes(cpuc->event, n0, 1)) | 1011 | if (check_excludes(cpuc->event, n0, 1)) |
| 1049 | goto out; | 1012 | goto out; |
| 1050 | if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) | 1013 | if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) |
| 1051 | goto out; | 1014 | goto out; |
| 1052 | 1015 | ||
| 1016 | nocheck: | ||
| 1053 | cpuc->n_events++; | 1017 | cpuc->n_events++; |
| 1054 | cpuc->n_added++; | 1018 | cpuc->n_added++; |
| 1055 | 1019 | ||
| @@ -1129,11 +1093,61 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 1129 | return 0; | 1093 | return 0; |
| 1130 | } | 1094 | } |
| 1131 | 1095 | ||
| 1096 | /* | ||
| 1097 | * Start group events scheduling transaction | ||
| 1098 | * Set the flag to make pmu::enable() not perform the | ||
| 1099 | * schedulability test, it will be performed at commit time | ||
| 1100 | */ | ||
| 1101 | static void sparc_pmu_start_txn(const struct pmu *pmu) | ||
| 1102 | { | ||
| 1103 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 1104 | |||
| 1105 | cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | /* | ||
| 1109 | * Stop group events scheduling transaction | ||
| 1110 | * Clear the flag and pmu::enable() will perform the | ||
| 1111 | * schedulability test. | ||
| 1112 | */ | ||
| 1113 | static void sparc_pmu_cancel_txn(const struct pmu *pmu) | ||
| 1114 | { | ||
| 1115 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 1116 | |||
| 1117 | cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | /* | ||
| 1121 | * Commit group events scheduling transaction | ||
| 1122 | * Perform the group schedulability test as a whole | ||
| 1123 | * Return 0 if success | ||
| 1124 | */ | ||
| 1125 | static int sparc_pmu_commit_txn(const struct pmu *pmu) | ||
| 1126 | { | ||
| 1127 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1128 | int n; | ||
| 1129 | |||
| 1130 | if (!sparc_pmu) | ||
| 1131 | return -EINVAL; | ||
| 1132 | |||
| 1133 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1134 | n = cpuc->n_events; | ||
| 1135 | if (check_excludes(cpuc->event, 0, n)) | ||
| 1136 | return -EINVAL; | ||
| 1137 | if (sparc_check_constraints(cpuc->event, cpuc->events, n)) | ||
| 1138 | return -EAGAIN; | ||
| 1139 | |||
| 1140 | return 0; | ||
| 1141 | } | ||
| 1142 | |||
| 1132 | static const struct pmu pmu = { | 1143 | static const struct pmu pmu = { |
| 1133 | .enable = sparc_pmu_enable, | 1144 | .enable = sparc_pmu_enable, |
| 1134 | .disable = sparc_pmu_disable, | 1145 | .disable = sparc_pmu_disable, |
| 1135 | .read = sparc_pmu_read, | 1146 | .read = sparc_pmu_read, |
| 1136 | .unthrottle = sparc_pmu_unthrottle, | 1147 | .unthrottle = sparc_pmu_unthrottle, |
| 1148 | .start_txn = sparc_pmu_start_txn, | ||
| 1149 | .cancel_txn = sparc_pmu_cancel_txn, | ||
| 1150 | .commit_txn = sparc_pmu_commit_txn, | ||
| 1137 | }; | 1151 | }; |
| 1138 | 1152 | ||
| 1139 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1153 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b05400a542ff..64a8ebff06fc 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
| @@ -89,7 +89,8 @@ | |||
| 89 | P4_CCCR_ENABLE) | 89 | P4_CCCR_ENABLE) |
| 90 | 90 | ||
| 91 | /* HT mask */ | 91 | /* HT mask */ |
| 92 | #define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) | 92 | #define P4_CCCR_MASK_HT \ |
| 93 | (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) | ||
| 93 | 94 | ||
| 94 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | 95 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ |
| 95 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | 96 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fd4db0db3708..c77586061bcb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -1717,7 +1717,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
| 1717 | */ | 1717 | */ |
| 1718 | regs->bp = rewind_frame_pointer(skip + 1); | 1718 | regs->bp = rewind_frame_pointer(skip + 1); |
| 1719 | regs->cs = __KERNEL_CS; | 1719 | regs->cs = __KERNEL_CS; |
| 1720 | local_save_flags(regs->flags); | 1720 | /* |
| 1721 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
| 1722 | * and the comment with PERF_EFLAGS_EXACT. | ||
| 1723 | */ | ||
| 1724 | regs->flags = 0; | ||
| 1721 | } | 1725 | } |
| 1722 | 1726 | ||
| 1723 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1727 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 424fc8de68e4..ae85d69644d1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -465,15 +465,21 @@ out: | |||
| 465 | return rc; | 465 | return rc; |
| 466 | } | 466 | } |
| 467 | 467 | ||
| 468 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | 468 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
| 469 | { | 469 | { |
| 470 | unsigned long dummy; | 470 | int overflow = 0; |
| 471 | u32 low, high; | ||
| 471 | 472 | ||
| 472 | rdmsrl(hwc->config_base + hwc->idx, dummy); | 473 | rdmsr(hwc->config_base + hwc->idx, low, high); |
| 473 | if (dummy & P4_CCCR_OVF) { | 474 | |
| 475 | /* we need to check high bit for unflagged overflows */ | ||
| 476 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
| 477 | overflow = 1; | ||
| 474 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 478 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
| 475 | ((u64)dummy) & ~P4_CCCR_OVF); | 479 | ((u64)low) & ~P4_CCCR_OVF); |
| 476 | } | 480 | } |
| 481 | |||
| 482 | return overflow; | ||
| 477 | } | 483 | } |
| 478 | 484 | ||
| 479 | static inline void p4_pmu_disable_event(struct perf_event *event) | 485 | static inline void p4_pmu_disable_event(struct perf_event *event) |
| @@ -584,21 +590,15 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
| 584 | 590 | ||
| 585 | WARN_ON_ONCE(hwc->idx != idx); | 591 | WARN_ON_ONCE(hwc->idx != idx); |
| 586 | 592 | ||
| 587 | /* | 593 | /* it might be unflagged overflow */ |
| 588 | * FIXME: Redundant call, actually not needed | 594 | handled = p4_pmu_clear_cccr_ovf(hwc); |
| 589 | * but just to check if we're screwed | ||
| 590 | */ | ||
| 591 | p4_pmu_clear_cccr_ovf(hwc); | ||
| 592 | 595 | ||
| 593 | val = x86_perf_event_update(event); | 596 | val = x86_perf_event_update(event); |
| 594 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | 597 | if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
| 595 | continue; | 598 | continue; |
| 596 | 599 | ||
| 597 | /* | 600 | /* event overflow for sure */ |
| 598 | * event overflow | 601 | data.period = event->hw.last_period; |
| 599 | */ | ||
| 600 | handled = 1; | ||
| 601 | data.period = event->hw.last_period; | ||
| 602 | 602 | ||
| 603 | if (!x86_perf_event_set_period(event)) | 603 | if (!x86_perf_event_set_period(event)) |
| 604 | continue; | 604 | continue; |
| @@ -670,7 +670,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |||
| 670 | 670 | ||
| 671 | /* | 671 | /* |
| 672 | * ESCR address hashing is tricky, ESCRs are not sequential | 672 | * ESCR address hashing is tricky, ESCRs are not sequential |
| 673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and | 673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and |
| 674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | 674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] |
| 675 | * | 675 | * |
| 676 | * so we make ~70% filled hashtable | 676 | * so we make ~70% filled hashtable |
| @@ -735,8 +735,9 @@ static int p4_get_escr_idx(unsigned int addr) | |||
| 735 | { | 735 | { |
| 736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | 736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); |
| 737 | 737 | ||
| 738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | 738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || |
| 739 | !p4_escr_table[idx])) { | 739 | !p4_escr_table[idx] || |
| 740 | p4_escr_table[idx] != addr)) { | ||
| 740 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | 741 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); |
| 741 | return -1; | 742 | return -1; |
| 742 | } | 743 | } |
| @@ -762,7 +763,7 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
| 762 | { | 763 | { |
| 763 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 764 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 764 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | 765 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; |
| 765 | int cpu = raw_smp_processor_id(); | 766 | int cpu = smp_processor_id(); |
| 766 | struct hw_perf_event *hwc; | 767 | struct hw_perf_event *hwc; |
| 767 | struct p4_event_bind *bind; | 768 | struct p4_event_bind *bind; |
| 768 | unsigned int i, thread, num; | 769 | unsigned int i, thread, num; |
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index df3d5c861cda..308e32570d84 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | /* IA32 Manual 3, 2-1 */ | 34 | /* IA32 Manual 3, 2-1 */ |
| 35 | static unsigned char prefix_codes[] = { | 35 | static unsigned char prefix_codes[] = { |
| 36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, | 36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, |
| 37 | 0x65, 0x2E, 0x3E, 0x66, 0x67 | 37 | 0x65, 0x66, 0x67 |
| 38 | }; | 38 | }; |
| 39 | /* IA32 Manual 3, 3-432*/ | 39 | /* IA32 Manual 3, 3-432*/ |
| 40 | static unsigned int reg_rop[] = { | 40 | static unsigned int reg_rop[] = { |
