diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 108 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event_p4.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 41 |
3 files changed, 84 insertions, 68 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index e2771939341d..cf4ce263ff81 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -91,6 +91,8 @@ struct cpu_hw_events { | |||
91 | 91 | ||
92 | /* Enabled/disable state. */ | 92 | /* Enabled/disable state. */ |
93 | int enabled; | 93 | int enabled; |
94 | |||
95 | unsigned int group_flag; | ||
94 | }; | 96 | }; |
95 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | 97 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; |
96 | 98 | ||
@@ -980,53 +982,6 @@ static int collect_events(struct perf_event *group, int max_count, | |||
980 | return n; | 982 | return n; |
981 | } | 983 | } |
982 | 984 | ||
983 | static void event_sched_in(struct perf_event *event) | ||
984 | { | ||
985 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
986 | event->oncpu = smp_processor_id(); | ||
987 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
988 | if (is_software_event(event)) | ||
989 | event->pmu->enable(event); | ||
990 | } | ||
991 | |||
992 | int hw_perf_group_sched_in(struct perf_event *group_leader, | ||
993 | struct perf_cpu_context *cpuctx, | ||
994 | struct perf_event_context *ctx) | ||
995 | { | ||
996 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
997 | struct perf_event *sub; | ||
998 | int n0, n; | ||
999 | |||
1000 | if (!sparc_pmu) | ||
1001 | return 0; | ||
1002 | |||
1003 | n0 = cpuc->n_events; | ||
1004 | n = collect_events(group_leader, perf_max_events - n0, | ||
1005 | &cpuc->event[n0], &cpuc->events[n0], | ||
1006 | &cpuc->current_idx[n0]); | ||
1007 | if (n < 0) | ||
1008 | return -EAGAIN; | ||
1009 | if (check_excludes(cpuc->event, n0, n)) | ||
1010 | return -EINVAL; | ||
1011 | if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0)) | ||
1012 | return -EAGAIN; | ||
1013 | cpuc->n_events = n0 + n; | ||
1014 | cpuc->n_added += n; | ||
1015 | |||
1016 | cpuctx->active_oncpu += n; | ||
1017 | n = 1; | ||
1018 | event_sched_in(group_leader); | ||
1019 | list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { | ||
1020 | if (sub->state != PERF_EVENT_STATE_OFF) { | ||
1021 | event_sched_in(sub); | ||
1022 | n++; | ||
1023 | } | ||
1024 | } | ||
1025 | ctx->nr_active += n; | ||
1026 | |||
1027 | return 1; | ||
1028 | } | ||
1029 | |||
1030 | static int sparc_pmu_enable(struct perf_event *event) | 985 | static int sparc_pmu_enable(struct perf_event *event) |
1031 | { | 986 | { |
1032 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 987 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1044,11 +999,20 @@ static int sparc_pmu_enable(struct perf_event *event) | |||
1044 | cpuc->events[n0] = event->hw.event_base; | 999 | cpuc->events[n0] = event->hw.event_base; |
1045 | cpuc->current_idx[n0] = PIC_NO_INDEX; | 1000 | cpuc->current_idx[n0] = PIC_NO_INDEX; |
1046 | 1001 | ||
1002 | /* | ||
1003 | * If group events scheduling transaction was started, | ||
1004 | * skip the schedulability test here, it will be peformed | ||
1005 | * at commit time(->commit_txn) as a whole | ||
1006 | */ | ||
1007 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
1008 | goto nocheck; | ||
1009 | |||
1047 | if (check_excludes(cpuc->event, n0, 1)) | 1010 | if (check_excludes(cpuc->event, n0, 1)) |
1048 | goto out; | 1011 | goto out; |
1049 | if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) | 1012 | if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1)) |
1050 | goto out; | 1013 | goto out; |
1051 | 1014 | ||
1015 | nocheck: | ||
1052 | cpuc->n_events++; | 1016 | cpuc->n_events++; |
1053 | cpuc->n_added++; | 1017 | cpuc->n_added++; |
1054 | 1018 | ||
@@ -1128,11 +1092,61 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1128 | return 0; | 1092 | return 0; |
1129 | } | 1093 | } |
1130 | 1094 | ||
1095 | /* | ||
1096 | * Start group events scheduling transaction | ||
1097 | * Set the flag to make pmu::enable() not perform the | ||
1098 | * schedulability test, it will be performed at commit time | ||
1099 | */ | ||
1100 | static void sparc_pmu_start_txn(const struct pmu *pmu) | ||
1101 | { | ||
1102 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
1103 | |||
1104 | cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | * Stop group events scheduling transaction | ||
1109 | * Clear the flag and pmu::enable() will perform the | ||
1110 | * schedulability test. | ||
1111 | */ | ||
1112 | static void sparc_pmu_cancel_txn(const struct pmu *pmu) | ||
1113 | { | ||
1114 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
1115 | |||
1116 | cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
1117 | } | ||
1118 | |||
1119 | /* | ||
1120 | * Commit group events scheduling transaction | ||
1121 | * Perform the group schedulability test as a whole | ||
1122 | * Return 0 if success | ||
1123 | */ | ||
1124 | static int sparc_pmu_commit_txn(const struct pmu *pmu) | ||
1125 | { | ||
1126 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1127 | int n; | ||
1128 | |||
1129 | if (!sparc_pmu) | ||
1130 | return -EINVAL; | ||
1131 | |||
1132 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1133 | n = cpuc->n_events; | ||
1134 | if (check_excludes(cpuc->event, 0, n)) | ||
1135 | return -EINVAL; | ||
1136 | if (sparc_check_constraints(cpuc->event, cpuc->events, n)) | ||
1137 | return -EAGAIN; | ||
1138 | |||
1139 | return 0; | ||
1140 | } | ||
1141 | |||
1131 | static const struct pmu pmu = { | 1142 | static const struct pmu pmu = { |
1132 | .enable = sparc_pmu_enable, | 1143 | .enable = sparc_pmu_enable, |
1133 | .disable = sparc_pmu_disable, | 1144 | .disable = sparc_pmu_disable, |
1134 | .read = sparc_pmu_read, | 1145 | .read = sparc_pmu_read, |
1135 | .unthrottle = sparc_pmu_unthrottle, | 1146 | .unthrottle = sparc_pmu_unthrottle, |
1147 | .start_txn = sparc_pmu_start_txn, | ||
1148 | .cancel_txn = sparc_pmu_cancel_txn, | ||
1149 | .commit_txn = sparc_pmu_commit_txn, | ||
1136 | }; | 1150 | }; |
1137 | 1151 | ||
1138 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1152 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b05400a542ff..64a8ebff06fc 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -89,7 +89,8 @@ | |||
89 | P4_CCCR_ENABLE) | 89 | P4_CCCR_ENABLE) |
90 | 90 | ||
91 | /* HT mask */ | 91 | /* HT mask */ |
92 | #define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) | 92 | #define P4_CCCR_MASK_HT \ |
93 | (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) | ||
93 | 94 | ||
94 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | 95 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ |
95 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | 96 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 424fc8de68e4..ae85d69644d1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -465,15 +465,21 @@ out: | |||
465 | return rc; | 465 | return rc; |
466 | } | 466 | } |
467 | 467 | ||
468 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | 468 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
469 | { | 469 | { |
470 | unsigned long dummy; | 470 | int overflow = 0; |
471 | u32 low, high; | ||
471 | 472 | ||
472 | rdmsrl(hwc->config_base + hwc->idx, dummy); | 473 | rdmsr(hwc->config_base + hwc->idx, low, high); |
473 | if (dummy & P4_CCCR_OVF) { | 474 | |
475 | /* we need to check high bit for unflagged overflows */ | ||
476 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
477 | overflow = 1; | ||
474 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 478 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
475 | ((u64)dummy) & ~P4_CCCR_OVF); | 479 | ((u64)low) & ~P4_CCCR_OVF); |
476 | } | 480 | } |
481 | |||
482 | return overflow; | ||
477 | } | 483 | } |
478 | 484 | ||
479 | static inline void p4_pmu_disable_event(struct perf_event *event) | 485 | static inline void p4_pmu_disable_event(struct perf_event *event) |
@@ -584,21 +590,15 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
584 | 590 | ||
585 | WARN_ON_ONCE(hwc->idx != idx); | 591 | WARN_ON_ONCE(hwc->idx != idx); |
586 | 592 | ||
587 | /* | 593 | /* it might be unflagged overflow */ |
588 | * FIXME: Redundant call, actually not needed | 594 | handled = p4_pmu_clear_cccr_ovf(hwc); |
589 | * but just to check if we're screwed | ||
590 | */ | ||
591 | p4_pmu_clear_cccr_ovf(hwc); | ||
592 | 595 | ||
593 | val = x86_perf_event_update(event); | 596 | val = x86_perf_event_update(event); |
594 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | 597 | if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
595 | continue; | 598 | continue; |
596 | 599 | ||
597 | /* | 600 | /* event overflow for sure */ |
598 | * event overflow | 601 | data.period = event->hw.last_period; |
599 | */ | ||
600 | handled = 1; | ||
601 | data.period = event->hw.last_period; | ||
602 | 602 | ||
603 | if (!x86_perf_event_set_period(event)) | 603 | if (!x86_perf_event_set_period(event)) |
604 | continue; | 604 | continue; |
@@ -670,7 +670,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | |||
670 | 670 | ||
671 | /* | 671 | /* |
672 | * ESCR address hashing is tricky, ESCRs are not sequential | 672 | * ESCR address hashing is tricky, ESCRs are not sequential |
673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and | 673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and |
674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | 674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] |
675 | * | 675 | * |
676 | * so we make ~70% filled hashtable | 676 | * so we make ~70% filled hashtable |
@@ -735,8 +735,9 @@ static int p4_get_escr_idx(unsigned int addr) | |||
735 | { | 735 | { |
736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | 736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); |
737 | 737 | ||
738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | 738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || |
739 | !p4_escr_table[idx])) { | 739 | !p4_escr_table[idx] || |
740 | p4_escr_table[idx] != addr)) { | ||
740 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | 741 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); |
741 | return -1; | 742 | return -1; |
742 | } | 743 | } |
@@ -762,7 +763,7 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
762 | { | 763 | { |
763 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 764 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
764 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | 765 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; |
765 | int cpu = raw_smp_processor_id(); | 766 | int cpu = smp_processor_id(); |
766 | struct hw_perf_event *hwc; | 767 | struct hw_perf_event *hwc; |
767 | struct p4_event_bind *bind; | 768 | struct p4_event_bind *bind; |
768 | unsigned int i, thread, num; | 769 | unsigned int i, thread, num; |