diff options
Diffstat (limited to 'arch')
45 files changed, 432 insertions, 302 deletions
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/alpha/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/arm/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index de12536d687f..417c392ddf1c 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
| @@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event, | |||
| 164 | struct hw_perf_event *hwc, | 164 | struct hw_perf_event *hwc, |
| 165 | int idx) | 165 | int idx) |
| 166 | { | 166 | { |
| 167 | s64 left = atomic64_read(&hwc->period_left); | 167 | s64 left = local64_read(&hwc->period_left); |
| 168 | s64 period = hwc->sample_period; | 168 | s64 period = hwc->sample_period; |
| 169 | int ret = 0; | 169 | int ret = 0; |
| 170 | 170 | ||
| 171 | if (unlikely(left <= -period)) { | 171 | if (unlikely(left <= -period)) { |
| 172 | left = period; | 172 | left = period; |
| 173 | atomic64_set(&hwc->period_left, left); | 173 | local64_set(&hwc->period_left, left); |
| 174 | hwc->last_period = period; | 174 | hwc->last_period = period; |
| 175 | ret = 1; | 175 | ret = 1; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| 178 | if (unlikely(left <= 0)) { | 178 | if (unlikely(left <= 0)) { |
| 179 | left += period; | 179 | left += period; |
| 180 | atomic64_set(&hwc->period_left, left); | 180 | local64_set(&hwc->period_left, left); |
| 181 | hwc->last_period = period; | 181 | hwc->last_period = period; |
| 182 | ret = 1; | 182 | ret = 1; |
| 183 | } | 183 | } |
| @@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event, | |||
| 185 | if (left > (s64)armpmu->max_period) | 185 | if (left > (s64)armpmu->max_period) |
| 186 | left = armpmu->max_period; | 186 | left = armpmu->max_period; |
| 187 | 187 | ||
| 188 | atomic64_set(&hwc->prev_count, (u64)-left); | 188 | local64_set(&hwc->prev_count, (u64)-left); |
| 189 | 189 | ||
| 190 | armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); | 190 | armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); |
| 191 | 191 | ||
| @@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event, | |||
| 204 | u64 delta; | 204 | u64 delta; |
| 205 | 205 | ||
| 206 | again: | 206 | again: |
| 207 | prev_raw_count = atomic64_read(&hwc->prev_count); | 207 | prev_raw_count = local64_read(&hwc->prev_count); |
| 208 | new_raw_count = armpmu->read_counter(idx); | 208 | new_raw_count = armpmu->read_counter(idx); |
| 209 | 209 | ||
| 210 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 210 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| 211 | new_raw_count) != prev_raw_count) | 211 | new_raw_count) != prev_raw_count) |
| 212 | goto again; | 212 | goto again; |
| 213 | 213 | ||
| 214 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 214 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
| 215 | delta >>= shift; | 215 | delta >>= shift; |
| 216 | 216 | ||
| 217 | atomic64_add(delta, &event->count); | 217 | local64_add(delta, &event->count); |
| 218 | atomic64_sub(delta, &hwc->period_left); | 218 | local64_sub(delta, &hwc->period_left); |
| 219 | 219 | ||
| 220 | return new_raw_count; | 220 | return new_raw_count; |
| 221 | } | 221 | } |
| @@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event) | |||
| 478 | if (!hwc->sample_period) { | 478 | if (!hwc->sample_period) { |
| 479 | hwc->sample_period = armpmu->max_period; | 479 | hwc->sample_period = armpmu->max_period; |
| 480 | hwc->last_period = hwc->sample_period; | 480 | hwc->last_period = hwc->sample_period; |
| 481 | atomic64_set(&hwc->period_left, hwc->sample_period); | 481 | local64_set(&hwc->period_left, hwc->sample_period); |
| 482 | } | 482 | } |
| 483 | 483 | ||
| 484 | err = 0; | 484 | err = 0; |
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/avr32/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/blackfin/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/cris/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/frv/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/frv/kernel/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/h8300/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/ia64/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/m32r/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/m68k/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/microblaze/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/mips/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/mn10300/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/parisc/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/powerpc/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index e6d4ce69b126..5c16b891d501 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h | |||
| @@ -21,3 +21,15 @@ | |||
| 21 | #ifdef CONFIG_FSL_EMB_PERF_EVENT | 21 | #ifdef CONFIG_FSL_EMB_PERF_EVENT |
| 22 | #include <asm/perf_event_fsl_emb.h> | 22 | #include <asm/perf_event_fsl_emb.h> |
| 23 | #endif | 23 | #endif |
| 24 | |||
| 25 | #ifdef CONFIG_PERF_EVENTS | ||
| 26 | #include <asm/ptrace.h> | ||
| 27 | #include <asm/reg.h> | ||
| 28 | |||
| 29 | #define perf_arch_fetch_caller_regs(regs, __ip) \ | ||
| 30 | do { \ | ||
| 31 | (regs)->nip = __ip; \ | ||
| 32 | (regs)->gpr[1] = *(unsigned long *)__get_SP(); \ | ||
| 33 | asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \ | ||
| 34 | } while (0) | ||
| 35 | #endif | ||
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 22e507c8a556..2d29752cbe16 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S | |||
| @@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7) | |||
| 127 | _GLOBAL(__restore_cpu_power7) | 127 | _GLOBAL(__restore_cpu_power7) |
| 128 | /* place holder */ | 128 | /* place holder */ |
| 129 | blr | 129 | blr |
| 130 | |||
| 131 | /* | ||
| 132 | * Get a minimal set of registers for our caller's nth caller. | ||
| 133 | * r3 = regs pointer, r5 = n. | ||
| 134 | * | ||
| 135 | * We only get R1 (stack pointer), NIP (next instruction pointer) | ||
| 136 | * and LR (link register). These are all we can get in the | ||
| 137 | * general case without doing complicated stack unwinding, but | ||
| 138 | * fortunately they are enough to do a stack backtrace, which | ||
| 139 | * is all we need them for. | ||
| 140 | */ | ||
| 141 | _GLOBAL(perf_arch_fetch_caller_regs) | ||
| 142 | mr r6,r1 | ||
| 143 | cmpwi r5,0 | ||
| 144 | mflr r4 | ||
| 145 | ble 2f | ||
| 146 | mtctr r5 | ||
| 147 | 1: PPC_LL r6,0(r6) | ||
| 148 | bdnz 1b | ||
| 149 | PPC_LL r4,PPC_LR_STKOFF(r6) | ||
| 150 | 2: PPC_LL r7,0(r6) | ||
| 151 | PPC_LL r7,PPC_LR_STKOFF(r7) | ||
| 152 | PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3) | ||
| 153 | PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) | ||
| 154 | PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) | ||
| 155 | blr | ||
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 5c14ffe51258..d301a30445e0 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
| @@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event) | |||
| 410 | * Therefore we treat them like NMIs. | 410 | * Therefore we treat them like NMIs. |
| 411 | */ | 411 | */ |
| 412 | do { | 412 | do { |
| 413 | prev = atomic64_read(&event->hw.prev_count); | 413 | prev = local64_read(&event->hw.prev_count); |
| 414 | barrier(); | 414 | barrier(); |
| 415 | val = read_pmc(event->hw.idx); | 415 | val = read_pmc(event->hw.idx); |
| 416 | } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | 416 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); |
| 417 | 417 | ||
| 418 | /* The counters are only 32 bits wide */ | 418 | /* The counters are only 32 bits wide */ |
| 419 | delta = (val - prev) & 0xfffffffful; | 419 | delta = (val - prev) & 0xfffffffful; |
| 420 | atomic64_add(delta, &event->count); | 420 | local64_add(delta, &event->count); |
| 421 | atomic64_sub(delta, &event->hw.period_left); | 421 | local64_sub(delta, &event->hw.period_left); |
| 422 | } | 422 | } |
| 423 | 423 | ||
| 424 | /* | 424 | /* |
| @@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw, | |||
| 444 | if (!event->hw.idx) | 444 | if (!event->hw.idx) |
| 445 | continue; | 445 | continue; |
| 446 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | 446 | val = (event->hw.idx == 5) ? pmc5 : pmc6; |
| 447 | prev = atomic64_read(&event->hw.prev_count); | 447 | prev = local64_read(&event->hw.prev_count); |
| 448 | event->hw.idx = 0; | 448 | event->hw.idx = 0; |
| 449 | delta = (val - prev) & 0xfffffffful; | 449 | delta = (val - prev) & 0xfffffffful; |
| 450 | atomic64_add(delta, &event->count); | 450 | local64_add(delta, &event->count); |
| 451 | } | 451 | } |
| 452 | } | 452 | } |
| 453 | 453 | ||
| @@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw, | |||
| 462 | event = cpuhw->limited_counter[i]; | 462 | event = cpuhw->limited_counter[i]; |
| 463 | event->hw.idx = cpuhw->limited_hwidx[i]; | 463 | event->hw.idx = cpuhw->limited_hwidx[i]; |
| 464 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | 464 | val = (event->hw.idx == 5) ? pmc5 : pmc6; |
| 465 | atomic64_set(&event->hw.prev_count, val); | 465 | local64_set(&event->hw.prev_count, val); |
| 466 | perf_event_update_userpage(event); | 466 | perf_event_update_userpage(event); |
| 467 | } | 467 | } |
| 468 | } | 468 | } |
| @@ -666,11 +666,11 @@ void hw_perf_enable(void) | |||
| 666 | } | 666 | } |
| 667 | val = 0; | 667 | val = 0; |
| 668 | if (event->hw.sample_period) { | 668 | if (event->hw.sample_period) { |
| 669 | left = atomic64_read(&event->hw.period_left); | 669 | left = local64_read(&event->hw.period_left); |
| 670 | if (left < 0x80000000L) | 670 | if (left < 0x80000000L) |
| 671 | val = 0x80000000L - left; | 671 | val = 0x80000000L - left; |
| 672 | } | 672 | } |
| 673 | atomic64_set(&event->hw.prev_count, val); | 673 | local64_set(&event->hw.prev_count, val); |
| 674 | event->hw.idx = idx; | 674 | event->hw.idx = idx; |
| 675 | write_pmc(idx, val); | 675 | write_pmc(idx, val); |
| 676 | perf_event_update_userpage(event); | 676 | perf_event_update_userpage(event); |
| @@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event) | |||
| 754 | * skip the schedulability test here, it will be peformed | 754 | * skip the schedulability test here, it will be peformed |
| 755 | * at commit time(->commit_txn) as a whole | 755 | * at commit time(->commit_txn) as a whole |
| 756 | */ | 756 | */ |
| 757 | if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) | 757 | if (cpuhw->group_flag & PERF_EVENT_TXN) |
| 758 | goto nocheck; | 758 | goto nocheck; |
| 759 | 759 | ||
| 760 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) | 760 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) |
| @@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event) | |||
| 845 | if (left < 0x80000000L) | 845 | if (left < 0x80000000L) |
| 846 | val = 0x80000000L - left; | 846 | val = 0x80000000L - left; |
| 847 | write_pmc(event->hw.idx, val); | 847 | write_pmc(event->hw.idx, val); |
| 848 | atomic64_set(&event->hw.prev_count, val); | 848 | local64_set(&event->hw.prev_count, val); |
| 849 | atomic64_set(&event->hw.period_left, left); | 849 | local64_set(&event->hw.period_left, left); |
| 850 | perf_event_update_userpage(event); | 850 | perf_event_update_userpage(event); |
| 851 | perf_enable(); | 851 | perf_enable(); |
| 852 | local_irq_restore(flags); | 852 | local_irq_restore(flags); |
| @@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu) | |||
| 861 | { | 861 | { |
| 862 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 862 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
| 863 | 863 | ||
| 864 | cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; | 864 | cpuhw->group_flag |= PERF_EVENT_TXN; |
| 865 | cpuhw->n_txn_start = cpuhw->n_events; | 865 | cpuhw->n_txn_start = cpuhw->n_events; |
| 866 | } | 866 | } |
| 867 | 867 | ||
| @@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) | |||
| 874 | { | 874 | { |
| 875 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 875 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
| 876 | 876 | ||
| 877 | cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; | 877 | cpuhw->group_flag &= ~PERF_EVENT_TXN; |
| 878 | } | 878 | } |
| 879 | 879 | ||
| 880 | /* | 880 | /* |
| @@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu) | |||
| 900 | for (i = cpuhw->n_txn_start; i < n; ++i) | 900 | for (i = cpuhw->n_txn_start; i < n; ++i) |
| 901 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | 901 | cpuhw->event[i]->hw.config = cpuhw->events[i]; |
| 902 | 902 | ||
| 903 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
| 903 | return 0; | 904 | return 0; |
| 904 | } | 905 | } |
| 905 | 906 | ||
| @@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
| 1111 | event->hw.config = events[n]; | 1112 | event->hw.config = events[n]; |
| 1112 | event->hw.event_base = cflags[n]; | 1113 | event->hw.event_base = cflags[n]; |
| 1113 | event->hw.last_period = event->hw.sample_period; | 1114 | event->hw.last_period = event->hw.sample_period; |
| 1114 | atomic64_set(&event->hw.period_left, event->hw.last_period); | 1115 | local64_set(&event->hw.period_left, event->hw.last_period); |
| 1115 | 1116 | ||
| 1116 | /* | 1117 | /* |
| 1117 | * See if we need to reserve the PMU. | 1118 | * See if we need to reserve the PMU. |
| @@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
| 1149 | int record = 0; | 1150 | int record = 0; |
| 1150 | 1151 | ||
| 1151 | /* we don't have to worry about interrupts here */ | 1152 | /* we don't have to worry about interrupts here */ |
| 1152 | prev = atomic64_read(&event->hw.prev_count); | 1153 | prev = local64_read(&event->hw.prev_count); |
| 1153 | delta = (val - prev) & 0xfffffffful; | 1154 | delta = (val - prev) & 0xfffffffful; |
| 1154 | atomic64_add(delta, &event->count); | 1155 | local64_add(delta, &event->count); |
| 1155 | 1156 | ||
| 1156 | /* | 1157 | /* |
| 1157 | * See if the total period for this event has expired, | 1158 | * See if the total period for this event has expired, |
| 1158 | * and update for the next period. | 1159 | * and update for the next period. |
| 1159 | */ | 1160 | */ |
| 1160 | val = 0; | 1161 | val = 0; |
| 1161 | left = atomic64_read(&event->hw.period_left) - delta; | 1162 | left = local64_read(&event->hw.period_left) - delta; |
| 1162 | if (period) { | 1163 | if (period) { |
| 1163 | if (left <= 0) { | 1164 | if (left <= 0) { |
| 1164 | left += period; | 1165 | left += period; |
| @@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val, | |||
| 1196 | } | 1197 | } |
| 1197 | 1198 | ||
| 1198 | write_pmc(event->hw.idx, val); | 1199 | write_pmc(event->hw.idx, val); |
| 1199 | atomic64_set(&event->hw.prev_count, val); | 1200 | local64_set(&event->hw.prev_count, val); |
| 1200 | atomic64_set(&event->hw.period_left, left); | 1201 | local64_set(&event->hw.period_left, left); |
| 1201 | perf_event_update_userpage(event); | 1202 | perf_event_update_userpage(event); |
| 1202 | } | 1203 | } |
| 1203 | 1204 | ||
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/s390/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/score/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/sh/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 81b6de41ae5d..7a3dc3567258 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
| @@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event, | |||
| 185 | * this is the simplest approach for maintaining consistency. | 185 | * this is the simplest approach for maintaining consistency. |
| 186 | */ | 186 | */ |
| 187 | again: | 187 | again: |
| 188 | prev_raw_count = atomic64_read(&hwc->prev_count); | 188 | prev_raw_count = local64_read(&hwc->prev_count); |
| 189 | new_raw_count = sh_pmu->read(idx); | 189 | new_raw_count = sh_pmu->read(idx); |
| 190 | 190 | ||
| 191 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 191 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| 192 | new_raw_count) != prev_raw_count) | 192 | new_raw_count) != prev_raw_count) |
| 193 | goto again; | 193 | goto again; |
| 194 | 194 | ||
| @@ -203,7 +203,7 @@ again: | |||
| 203 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 203 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
| 204 | delta >>= shift; | 204 | delta >>= shift; |
| 205 | 205 | ||
| 206 | atomic64_add(delta, &event->count); | 206 | local64_add(delta, &event->count); |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | static void sh_pmu_disable(struct perf_event *event) | 209 | static void sh_pmu_disable(struct perf_event *event) |
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/sparc/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 7e2669894ce8..74c4e0cd889c 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h | |||
| @@ -6,7 +6,15 @@ extern void set_perf_event_pending(void); | |||
| 6 | #define PERF_EVENT_INDEX_OFFSET 0 | 6 | #define PERF_EVENT_INDEX_OFFSET 0 |
| 7 | 7 | ||
| 8 | #ifdef CONFIG_PERF_EVENTS | 8 | #ifdef CONFIG_PERF_EVENTS |
| 9 | #include <asm/ptrace.h> | ||
| 10 | |||
| 9 | extern void init_hw_perf_events(void); | 11 | extern void init_hw_perf_events(void); |
| 12 | |||
| 13 | extern void | ||
| 14 | __perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); | ||
| 15 | |||
| 16 | #define perf_arch_fetch_caller_regs(pt_regs, ip) \ | ||
| 17 | __perf_arch_fetch_caller_regs(pt_regs, ip, 1); | ||
| 10 | #else | 18 | #else |
| 11 | static inline void init_hw_perf_events(void) { } | 19 | static inline void init_hw_perf_events(void) { } |
| 12 | #endif | 20 | #endif |
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S index 92090cc9e829..682fee06a16b 100644 --- a/arch/sparc/kernel/helpers.S +++ b/arch/sparc/kernel/helpers.S | |||
| @@ -47,9 +47,9 @@ stack_trace_flush: | |||
| 47 | .size stack_trace_flush,.-stack_trace_flush | 47 | .size stack_trace_flush,.-stack_trace_flush |
| 48 | 48 | ||
| 49 | #ifdef CONFIG_PERF_EVENTS | 49 | #ifdef CONFIG_PERF_EVENTS |
| 50 | .globl perf_arch_fetch_caller_regs | 50 | .globl __perf_arch_fetch_caller_regs |
| 51 | .type perf_arch_fetch_caller_regs,#function | 51 | .type __perf_arch_fetch_caller_regs,#function |
| 52 | perf_arch_fetch_caller_regs: | 52 | __perf_arch_fetch_caller_regs: |
| 53 | /* We always read the %pstate into %o5 since we will use | 53 | /* We always read the %pstate into %o5 since we will use |
| 54 | * that to construct a fake %tstate to store into the regs. | 54 | * that to construct a fake %tstate to store into the regs. |
| 55 | */ | 55 | */ |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 44faabc3c02c..357ced3c33ff 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
| @@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event, | |||
| 572 | s64 delta; | 572 | s64 delta; |
| 573 | 573 | ||
| 574 | again: | 574 | again: |
| 575 | prev_raw_count = atomic64_read(&hwc->prev_count); | 575 | prev_raw_count = local64_read(&hwc->prev_count); |
| 576 | new_raw_count = read_pmc(idx); | 576 | new_raw_count = read_pmc(idx); |
| 577 | 577 | ||
| 578 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 578 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| 579 | new_raw_count) != prev_raw_count) | 579 | new_raw_count) != prev_raw_count) |
| 580 | goto again; | 580 | goto again; |
| 581 | 581 | ||
| 582 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 582 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
| 583 | delta >>= shift; | 583 | delta >>= shift; |
| 584 | 584 | ||
| 585 | atomic64_add(delta, &event->count); | 585 | local64_add(delta, &event->count); |
| 586 | atomic64_sub(delta, &hwc->period_left); | 586 | local64_sub(delta, &hwc->period_left); |
| 587 | 587 | ||
| 588 | return new_raw_count; | 588 | return new_raw_count; |
| 589 | } | 589 | } |
| @@ -591,27 +591,27 @@ again: | |||
| 591 | static int sparc_perf_event_set_period(struct perf_event *event, | 591 | static int sparc_perf_event_set_period(struct perf_event *event, |
| 592 | struct hw_perf_event *hwc, int idx) | 592 | struct hw_perf_event *hwc, int idx) |
| 593 | { | 593 | { |
| 594 | s64 left = atomic64_read(&hwc->period_left); | 594 | s64 left = local64_read(&hwc->period_left); |
| 595 | s64 period = hwc->sample_period; | 595 | s64 period = hwc->sample_period; |
| 596 | int ret = 0; | 596 | int ret = 0; |
| 597 | 597 | ||
| 598 | if (unlikely(left <= -period)) { | 598 | if (unlikely(left <= -period)) { |
| 599 | left = period; | 599 | left = period; |
| 600 | atomic64_set(&hwc->period_left, left); | 600 | local64_set(&hwc->period_left, left); |
| 601 | hwc->last_period = period; | 601 | hwc->last_period = period; |
| 602 | ret = 1; | 602 | ret = 1; |
| 603 | } | 603 | } |
| 604 | 604 | ||
| 605 | if (unlikely(left <= 0)) { | 605 | if (unlikely(left <= 0)) { |
| 606 | left += period; | 606 | left += period; |
| 607 | atomic64_set(&hwc->period_left, left); | 607 | local64_set(&hwc->period_left, left); |
| 608 | hwc->last_period = period; | 608 | hwc->last_period = period; |
| 609 | ret = 1; | 609 | ret = 1; |
| 610 | } | 610 | } |
| 611 | if (left > MAX_PERIOD) | 611 | if (left > MAX_PERIOD) |
| 612 | left = MAX_PERIOD; | 612 | left = MAX_PERIOD; |
| 613 | 613 | ||
| 614 | atomic64_set(&hwc->prev_count, (u64)-left); | 614 | local64_set(&hwc->prev_count, (u64)-left); |
| 615 | 615 | ||
| 616 | write_pmc(idx, (u64)(-left) & 0xffffffff); | 616 | write_pmc(idx, (u64)(-left) & 0xffffffff); |
| 617 | 617 | ||
| @@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event) | |||
| 1006 | * skip the schedulability test here, it will be peformed | 1006 | * skip the schedulability test here, it will be peformed |
| 1007 | * at commit time(->commit_txn) as a whole | 1007 | * at commit time(->commit_txn) as a whole |
| 1008 | */ | 1008 | */ |
| 1009 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 1009 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 1010 | goto nocheck; | 1010 | goto nocheck; |
| 1011 | 1011 | ||
| 1012 | if (check_excludes(cpuc->event, n0, 1)) | 1012 | if (check_excludes(cpuc->event, n0, 1)) |
| @@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 1088 | if (!hwc->sample_period) { | 1088 | if (!hwc->sample_period) { |
| 1089 | hwc->sample_period = MAX_PERIOD; | 1089 | hwc->sample_period = MAX_PERIOD; |
| 1090 | hwc->last_period = hwc->sample_period; | 1090 | hwc->last_period = hwc->sample_period; |
| 1091 | atomic64_set(&hwc->period_left, hwc->sample_period); | 1091 | local64_set(&hwc->period_left, hwc->sample_period); |
| 1092 | } | 1092 | } |
| 1093 | 1093 | ||
| 1094 | return 0; | 1094 | return 0; |
| @@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) | |||
| 1103 | { | 1103 | { |
| 1104 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 1104 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
| 1105 | 1105 | ||
| 1106 | cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; | 1106 | cpuhw->group_flag |= PERF_EVENT_TXN; |
| 1107 | } | 1107 | } |
| 1108 | 1108 | ||
| 1109 | /* | 1109 | /* |
| @@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) | |||
| 1115 | { | 1115 | { |
| 1116 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | 1116 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); |
| 1117 | 1117 | ||
| 1118 | cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; | 1118 | cpuhw->group_flag &= ~PERF_EVENT_TXN; |
| 1119 | } | 1119 | } |
| 1120 | 1120 | ||
| 1121 | /* | 1121 | /* |
| @@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) | |||
| 1138 | if (sparc_check_constraints(cpuc->event, cpuc->events, n)) | 1138 | if (sparc_check_constraints(cpuc->event, cpuc->events, n)) |
| 1139 | return -EAGAIN; | 1139 | return -EAGAIN; |
| 1140 | 1140 | ||
| 1141 | cpuc->group_flag &= ~PERF_EVENT_TXN; | ||
| 1141 | return 0; | 1142 | return 0; |
| 1142 | } | 1143 | } |
| 1143 | 1144 | ||
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 942255310e6a..528a11e8d3e3 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
| @@ -20,10 +20,10 @@ struct arch_hw_breakpoint { | |||
| 20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
| 21 | 21 | ||
| 22 | /* Available HW breakpoint length encodings */ | 22 | /* Available HW breakpoint length encodings */ |
| 23 | #define X86_BREAKPOINT_LEN_X 0x00 | ||
| 23 | #define X86_BREAKPOINT_LEN_1 0x40 | 24 | #define X86_BREAKPOINT_LEN_1 0x40 |
| 24 | #define X86_BREAKPOINT_LEN_2 0x44 | 25 | #define X86_BREAKPOINT_LEN_2 0x44 |
| 25 | #define X86_BREAKPOINT_LEN_4 0x4c | 26 | #define X86_BREAKPOINT_LEN_4 0x4c |
| 26 | #define X86_BREAKPOINT_LEN_EXECUTE 0x40 | ||
| 27 | 27 | ||
| 28 | #ifdef CONFIG_X86_64 | 28 | #ifdef CONFIG_X86_64 |
| 29 | #define X86_BREAKPOINT_LEN_8 0x48 | 29 | #define X86_BREAKPOINT_LEN_8 0x48 |
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/x86/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e0..6e742cc4251b 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
| @@ -68,8 +68,9 @@ union cpuid10_eax { | |||
| 68 | 68 | ||
| 69 | union cpuid10_edx { | 69 | union cpuid10_edx { |
| 70 | struct { | 70 | struct { |
| 71 | unsigned int num_counters_fixed:4; | 71 | unsigned int num_counters_fixed:5; |
| 72 | unsigned int reserved:28; | 72 | unsigned int bit_width_fixed:8; |
| 73 | unsigned int reserved:19; | ||
| 73 | } split; | 74 | } split; |
| 74 | unsigned int full; | 75 | unsigned int full; |
| 75 | }; | 76 | }; |
| @@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | |||
| 140 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 141 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
| 141 | #define perf_misc_flags(regs) perf_misc_flags(regs) | 142 | #define perf_misc_flags(regs) perf_misc_flags(regs) |
| 142 | 143 | ||
| 144 | #include <asm/stacktrace.h> | ||
| 145 | |||
| 146 | /* | ||
| 147 | * We abuse bit 3 from flags to pass exact information, see perf_misc_flags | ||
| 148 | * and the comment with PERF_EFLAGS_EXACT. | ||
| 149 | */ | ||
| 150 | #define perf_arch_fetch_caller_regs(regs, __ip) { \ | ||
| 151 | (regs)->ip = (__ip); \ | ||
| 152 | (regs)->bp = caller_frame_pointer(); \ | ||
| 153 | (regs)->cs = __KERNEL_CS; \ | ||
| 154 | regs->flags = 0; \ | ||
| 155 | } | ||
| 156 | |||
| 143 | #else | 157 | #else |
| 144 | static inline void init_hw_perf_events(void) { } | 158 | static inline void init_hw_perf_events(void) { } |
| 145 | static inline void perf_events_lapic_init(void) { } | 159 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 64a8ebff06fc..def500776b16 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | 19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ |
| 20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | 20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) |
| 21 | #define ARCH_P4_MAX_CCCR (18) | 21 | #define ARCH_P4_MAX_CCCR (18) |
| 22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
| 23 | 22 | ||
| 24 | #define P4_ESCR_EVENT_MASK 0x7e000000U | 23 | #define P4_ESCR_EVENT_MASK 0x7e000000U |
| 25 | #define P4_ESCR_EVENT_SHIFT 25 | 24 | #define P4_ESCR_EVENT_SHIFT 25 |
| @@ -71,10 +70,6 @@ | |||
| 71 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | 70 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) |
| 72 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | 71 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) |
| 73 | 72 | ||
| 74 | /* Custom bits in reerved CCCR area */ | ||
| 75 | #define P4_CCCR_CACHE_OPS_MASK 0x0000003fU | ||
| 76 | |||
| 77 | |||
| 78 | /* Non HT mask */ | 73 | /* Non HT mask */ |
| 79 | #define P4_CCCR_MASK \ | 74 | #define P4_CCCR_MASK \ |
| 80 | (P4_CCCR_OVF | \ | 75 | (P4_CCCR_OVF | \ |
| @@ -106,8 +101,7 @@ | |||
| 106 | * ESCR and CCCR but rather an only packed value should | 101 | * ESCR and CCCR but rather an only packed value should |
| 107 | * be unpacked and written to a proper addresses | 102 | * be unpacked and written to a proper addresses |
| 108 | * | 103 | * |
| 109 | * the base idea is to pack as much info as | 104 | * the base idea is to pack as much info as possible |
| 110 | * possible | ||
| 111 | */ | 105 | */ |
| 112 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | 106 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) |
| 113 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | 107 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) |
| @@ -130,8 +124,6 @@ | |||
| 130 | t; \ | 124 | t; \ |
| 131 | }) | 125 | }) |
| 132 | 126 | ||
| 133 | #define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) | ||
| 134 | |||
| 135 | #define P4_CONFIG_HT_SHIFT 63 | 127 | #define P4_CONFIG_HT_SHIFT 63 |
| 136 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | 128 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) |
| 137 | 129 | ||
| @@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | |||
| 214 | return escr; | 206 | return escr; |
| 215 | } | 207 | } |
| 216 | 208 | ||
| 209 | /* | ||
| 210 | * This are the events which should be used in "Event Select" | ||
| 211 | * field of ESCR register, they are like unique keys which allow | ||
| 212 | * the kernel to determinate which CCCR and COUNTER should be | ||
| 213 | * used to track an event | ||
| 214 | */ | ||
| 217 | enum P4_EVENTS { | 215 | enum P4_EVENTS { |
| 218 | P4_EVENT_TC_DELIVER_MODE, | 216 | P4_EVENT_TC_DELIVER_MODE, |
| 219 | P4_EVENT_BPU_FETCH_REQUEST, | 217 | P4_EVENT_BPU_FETCH_REQUEST, |
| @@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES { | |||
| 561 | * a caller should use P4_ESCR_EMASK_NAME helper to | 559 | * a caller should use P4_ESCR_EMASK_NAME helper to |
| 562 | * pick the EventMask needed, for example | 560 | * pick the EventMask needed, for example |
| 563 | * | 561 | * |
| 564 | * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) | 562 | * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
| 565 | */ | 563 | */ |
| 566 | enum P4_ESCR_EMASKS { | 564 | enum P4_ESCR_EMASKS { |
| 567 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), | 565 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), |
| @@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS { | |||
| 753 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), | 751 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), |
| 754 | }; | 752 | }; |
| 755 | 753 | ||
| 756 | /* P4 PEBS: stale for a while */ | 754 | /* |
| 757 | #define P4_PEBS_METRIC_MASK 0x00001fffU | 755 | * P4 PEBS specifics (Replay Event only) |
| 758 | #define P4_PEBS_UOB_TAG 0x01000000U | 756 | * |
| 759 | #define P4_PEBS_ENABLE 0x02000000U | 757 | * Format (bits): |
| 760 | 758 | * 0-6: metric from P4_PEBS_METRIC enum | |
| 761 | /* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ | 759 | * 7 : reserved |
| 762 | #define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 | 760 | * 8 : reserved |
| 763 | #define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 | 761 | * 9-11 : reserved |
| 764 | #define P4_PEBS__dtlb_load_miss_retired 0x3000004 | 762 | * |
| 765 | #define P4_PEBS__dtlb_store_miss_retired 0x3000004 | 763 | * Note we have UOP and PEBS bits reserved for now |
| 766 | #define P4_PEBS__dtlb_all_miss_retired 0x3000004 | 764 | * just in case if we will need them once |
| 767 | #define P4_PEBS__tagged_mispred_branch 0x3018000 | 765 | */ |
| 768 | #define P4_PEBS__mob_load_replay_retired 0x3000200 | 766 | #define P4_PEBS_CONFIG_ENABLE (1 << 7) |
| 769 | #define P4_PEBS__split_load_retired 0x3000400 | 767 | #define P4_PEBS_CONFIG_UOP_TAG (1 << 8) |
| 770 | #define P4_PEBS__split_store_retired 0x3000400 | 768 | #define P4_PEBS_CONFIG_METRIC_MASK 0x3f |
| 771 | 769 | #define P4_PEBS_CONFIG_MASK 0xff | |
| 772 | #define P4_VERT__1stl_cache_load_miss_retired 0x0000001 | 770 | |
| 773 | #define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 | 771 | /* |
| 774 | #define P4_VERT__dtlb_load_miss_retired 0x0000001 | 772 | * mem: Only counters MSR_IQ_COUNTER4 (16) and |
| 775 | #define P4_VERT__dtlb_store_miss_retired 0x0000002 | 773 | * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling |
| 776 | #define P4_VERT__dtlb_all_miss_retired 0x0000003 | 774 | */ |
| 777 | #define P4_VERT__tagged_mispred_branch 0x0000010 | 775 | #define P4_PEBS_ENABLE 0x02000000U |
| 778 | #define P4_VERT__mob_load_replay_retired 0x0000001 | 776 | #define P4_PEBS_ENABLE_UOP_TAG 0x01000000U |
| 779 | #define P4_VERT__split_load_retired 0x0000001 | 777 | |
| 780 | #define P4_VERT__split_store_retired 0x0000002 | 778 | #define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) |
| 781 | 779 | #define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) | |
| 782 | enum P4_CACHE_EVENTS { | 780 | |
| 783 | P4_CACHE__NONE, | 781 | #define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask)) |
| 784 | 782 | ||
| 785 | P4_CACHE__1stl_cache_load_miss_retired, | 783 | enum P4_PEBS_METRIC { |
| 786 | P4_CACHE__2ndl_cache_load_miss_retired, | 784 | P4_PEBS_METRIC__none, |
| 787 | P4_CACHE__dtlb_load_miss_retired, | 785 | |
| 788 | P4_CACHE__dtlb_store_miss_retired, | 786 | P4_PEBS_METRIC__1stl_cache_load_miss_retired, |
| 789 | P4_CACHE__itlb_reference_hit, | 787 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired, |
| 790 | P4_CACHE__itlb_reference_miss, | 788 | P4_PEBS_METRIC__dtlb_load_miss_retired, |
| 791 | 789 | P4_PEBS_METRIC__dtlb_store_miss_retired, | |
| 792 | P4_CACHE__MAX | 790 | P4_PEBS_METRIC__dtlb_all_miss_retired, |
| 791 | P4_PEBS_METRIC__tagged_mispred_branch, | ||
| 792 | P4_PEBS_METRIC__mob_load_replay_retired, | ||
| 793 | P4_PEBS_METRIC__split_load_retired, | ||
| 794 | P4_PEBS_METRIC__split_store_retired, | ||
| 795 | |||
| 796 | P4_PEBS_METRIC__max | ||
| 793 | }; | 797 | }; |
| 794 | 798 | ||
| 795 | #endif /* PERF_EVENT_P4_H */ | 799 | #endif /* PERF_EVENT_P4_H */ |
| 800 | |||
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 4dab78edbad9..2b16a2ad23dc 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
| @@ -1,6 +1,13 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | */ | ||
| 5 | |||
| 1 | #ifndef _ASM_X86_STACKTRACE_H | 6 | #ifndef _ASM_X86_STACKTRACE_H |
| 2 | #define _ASM_X86_STACKTRACE_H | 7 | #define _ASM_X86_STACKTRACE_H |
| 3 | 8 | ||
| 9 | #include <linux/uaccess.h> | ||
| 10 | |||
| 4 | extern int kstack_depth_to_print; | 11 | extern int kstack_depth_to_print; |
| 5 | 12 | ||
| 6 | struct thread_info; | 13 | struct thread_info; |
| @@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
| 42 | unsigned long *stack, unsigned long bp, | 49 | unsigned long *stack, unsigned long bp, |
| 43 | const struct stacktrace_ops *ops, void *data); | 50 | const struct stacktrace_ops *ops, void *data); |
| 44 | 51 | ||
| 52 | #ifdef CONFIG_X86_32 | ||
| 53 | #define STACKSLOTS_PER_LINE 8 | ||
| 54 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
| 55 | #else | ||
| 56 | #define STACKSLOTS_PER_LINE 4 | ||
| 57 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
| 58 | #endif | ||
| 59 | |||
| 60 | extern void | ||
| 61 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 62 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
| 63 | |||
| 64 | extern void | ||
| 65 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 66 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
| 67 | |||
| 68 | extern unsigned int code_bytes; | ||
| 69 | |||
| 70 | /* The form of the top of the frame on the stack */ | ||
| 71 | struct stack_frame { | ||
| 72 | struct stack_frame *next_frame; | ||
| 73 | unsigned long return_address; | ||
| 74 | }; | ||
| 75 | |||
| 76 | struct stack_frame_ia32 { | ||
| 77 | u32 next_frame; | ||
| 78 | u32 return_address; | ||
| 79 | }; | ||
| 80 | |||
| 81 | static inline unsigned long caller_frame_pointer(void) | ||
| 82 | { | ||
| 83 | struct stack_frame *frame; | ||
| 84 | |||
| 85 | get_bp(frame); | ||
| 86 | |||
| 87 | #ifdef CONFIG_FRAME_POINTER | ||
| 88 | frame = frame->next_frame; | ||
| 89 | #endif | ||
| 90 | |||
| 91 | return (unsigned long)frame; | ||
| 92 | } | ||
| 93 | |||
| 45 | #endif /* _ASM_X86_STACKTRACE_H */ | 94 | #endif /* _ASM_X86_STACKTRACE_H */ |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..f2da20fda02d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -220,6 +220,7 @@ struct x86_pmu { | |||
| 220 | struct perf_event *event); | 220 | struct perf_event *event); |
| 221 | struct event_constraint *event_constraints; | 221 | struct event_constraint *event_constraints; |
| 222 | void (*quirks)(void); | 222 | void (*quirks)(void); |
| 223 | int perfctr_second_write; | ||
| 223 | 224 | ||
| 224 | int (*cpu_prepare)(int cpu); | 225 | int (*cpu_prepare)(int cpu); |
| 225 | void (*cpu_starting)(int cpu); | 226 | void (*cpu_starting)(int cpu); |
| @@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) | |||
| 295 | * count to the generic event atomically: | 296 | * count to the generic event atomically: |
| 296 | */ | 297 | */ |
| 297 | again: | 298 | again: |
| 298 | prev_raw_count = atomic64_read(&hwc->prev_count); | 299 | prev_raw_count = local64_read(&hwc->prev_count); |
| 299 | rdmsrl(hwc->event_base + idx, new_raw_count); | 300 | rdmsrl(hwc->event_base + idx, new_raw_count); |
| 300 | 301 | ||
| 301 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 302 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| 302 | new_raw_count) != prev_raw_count) | 303 | new_raw_count) != prev_raw_count) |
| 303 | goto again; | 304 | goto again; |
| 304 | 305 | ||
| @@ -313,8 +314,8 @@ again: | |||
| 313 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 314 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
| 314 | delta >>= shift; | 315 | delta >>= shift; |
| 315 | 316 | ||
| 316 | atomic64_add(delta, &event->count); | 317 | local64_add(delta, &event->count); |
| 317 | atomic64_sub(delta, &hwc->period_left); | 318 | local64_sub(delta, &hwc->period_left); |
| 318 | 319 | ||
| 319 | return new_raw_count; | 320 | return new_raw_count; |
| 320 | } | 321 | } |
| @@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
| 438 | if (!hwc->sample_period) { | 439 | if (!hwc->sample_period) { |
| 439 | hwc->sample_period = x86_pmu.max_period; | 440 | hwc->sample_period = x86_pmu.max_period; |
| 440 | hwc->last_period = hwc->sample_period; | 441 | hwc->last_period = hwc->sample_period; |
| 441 | atomic64_set(&hwc->period_left, hwc->sample_period); | 442 | local64_set(&hwc->period_left, hwc->sample_period); |
| 442 | } else { | 443 | } else { |
| 443 | /* | 444 | /* |
| 444 | * If we have a PMU initialized but no APIC | 445 | * If we have a PMU initialized but no APIC |
| @@ -885,7 +886,7 @@ static int | |||
| 885 | x86_perf_event_set_period(struct perf_event *event) | 886 | x86_perf_event_set_period(struct perf_event *event) |
| 886 | { | 887 | { |
| 887 | struct hw_perf_event *hwc = &event->hw; | 888 | struct hw_perf_event *hwc = &event->hw; |
| 888 | s64 left = atomic64_read(&hwc->period_left); | 889 | s64 left = local64_read(&hwc->period_left); |
| 889 | s64 period = hwc->sample_period; | 890 | s64 period = hwc->sample_period; |
| 890 | int ret = 0, idx = hwc->idx; | 891 | int ret = 0, idx = hwc->idx; |
| 891 | 892 | ||
| @@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 897 | */ | 898 | */ |
| 898 | if (unlikely(left <= -period)) { | 899 | if (unlikely(left <= -period)) { |
| 899 | left = period; | 900 | left = period; |
| 900 | atomic64_set(&hwc->period_left, left); | 901 | local64_set(&hwc->period_left, left); |
| 901 | hwc->last_period = period; | 902 | hwc->last_period = period; |
| 902 | ret = 1; | 903 | ret = 1; |
| 903 | } | 904 | } |
| 904 | 905 | ||
| 905 | if (unlikely(left <= 0)) { | 906 | if (unlikely(left <= 0)) { |
| 906 | left += period; | 907 | left += period; |
| 907 | atomic64_set(&hwc->period_left, left); | 908 | local64_set(&hwc->period_left, left); |
| 908 | hwc->last_period = period; | 909 | hwc->last_period = period; |
| 909 | ret = 1; | 910 | ret = 1; |
| 910 | } | 911 | } |
| @@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 923 | * The hw event starts counting from this event offset, | 924 | * The hw event starts counting from this event offset, |
| 924 | * mark it to be able to extra future deltas: | 925 | * mark it to be able to extra future deltas: |
| 925 | */ | 926 | */ |
| 926 | atomic64_set(&hwc->prev_count, (u64)-left); | 927 | local64_set(&hwc->prev_count, (u64)-left); |
| 927 | 928 | ||
| 928 | wrmsrl(hwc->event_base + idx, | 929 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); |
| 930 | |||
| 931 | /* | ||
| 932 | * Due to erratum on certan cpu we need | ||
| 933 | * a second write to be sure the register | ||
| 934 | * is updated properly | ||
| 935 | */ | ||
| 936 | if (x86_pmu.perfctr_second_write) { | ||
| 937 | wrmsrl(hwc->event_base + idx, | ||
| 929 | (u64)(-left) & x86_pmu.cntval_mask); | 938 | (u64)(-left) & x86_pmu.cntval_mask); |
| 939 | } | ||
| 930 | 940 | ||
| 931 | perf_event_update_userpage(event); | 941 | perf_event_update_userpage(event); |
| 932 | 942 | ||
| @@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 969 | * skip the schedulability test here, it will be peformed | 979 | * skip the schedulability test here, it will be peformed |
| 970 | * at commit time(->commit_txn) as a whole | 980 | * at commit time(->commit_txn) as a whole |
| 971 | */ | 981 | */ |
| 972 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 982 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 973 | goto out; | 983 | goto out; |
| 974 | 984 | ||
| 975 | ret = x86_pmu.schedule_events(cpuc, n, assign); | 985 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
| @@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
| 1096 | * The events never got scheduled and ->cancel_txn will truncate | 1106 | * The events never got scheduled and ->cancel_txn will truncate |
| 1097 | * the event_list. | 1107 | * the event_list. |
| 1098 | */ | 1108 | */ |
| 1099 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | 1109 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 1100 | return; | 1110 | return; |
| 1101 | 1111 | ||
| 1102 | x86_pmu_stop(event); | 1112 | x86_pmu_stop(event); |
| @@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) | |||
| 1388 | { | 1398 | { |
| 1389 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1399 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1390 | 1400 | ||
| 1391 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | 1401 | cpuc->group_flag |= PERF_EVENT_TXN; |
| 1392 | cpuc->n_txn = 0; | 1402 | cpuc->n_txn = 0; |
| 1393 | } | 1403 | } |
| 1394 | 1404 | ||
| @@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
| 1401 | { | 1411 | { |
| 1402 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1412 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1403 | 1413 | ||
| 1404 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | 1414 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
| 1405 | /* | 1415 | /* |
| 1406 | * Truncate the collected events. | 1416 | * Truncate the collected events. |
| 1407 | */ | 1417 | */ |
| @@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) | |||
| 1435 | */ | 1445 | */ |
| 1436 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1446 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
| 1437 | 1447 | ||
| 1438 | /* | 1448 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
| 1439 | * Clear out the txn count so that ->cancel_txn() which gets | ||
| 1440 | * run after ->commit_txn() doesn't undo things. | ||
| 1441 | */ | ||
| 1442 | cpuc->n_txn = 0; | ||
| 1443 | 1449 | ||
| 1444 | return 0; | 1450 | return 0; |
| 1445 | } | 1451 | } |
| @@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = { | |||
| 1607 | .walk_stack = print_context_stack_bp, | 1613 | .walk_stack = print_context_stack_bp, |
| 1608 | }; | 1614 | }; |
| 1609 | 1615 | ||
| 1610 | #include "../dumpstack.h" | ||
| 1611 | |||
| 1612 | static void | 1616 | static void |
| 1613 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1617 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
| 1614 | { | 1618 | { |
| @@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
| 1730 | return entry; | 1734 | return entry; |
| 1731 | } | 1735 | } |
| 1732 | 1736 | ||
| 1733 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) | ||
| 1734 | { | ||
| 1735 | regs->ip = ip; | ||
| 1736 | /* | ||
| 1737 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
| 1738 | * the skip level | ||
| 1739 | */ | ||
| 1740 | regs->bp = rewind_frame_pointer(skip + 1); | ||
| 1741 | regs->cs = __KERNEL_CS; | ||
| 1742 | /* | ||
| 1743 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
| 1744 | * and the comment with PERF_EFLAGS_EXACT. | ||
| 1745 | */ | ||
| 1746 | regs->flags = 0; | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1737 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
| 1750 | { | 1738 | { |
| 1751 | unsigned long ip; | 1739 | unsigned long ip; |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d1..107711bf0ee8 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -21,22 +21,36 @@ struct p4_event_bind { | |||
| 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
| 22 | }; | 22 | }; |
| 23 | 23 | ||
| 24 | struct p4_cache_event_bind { | 24 | struct p4_pebs_bind { |
| 25 | unsigned int metric_pebs; | 25 | unsigned int metric_pebs; |
| 26 | unsigned int metric_vert; | 26 | unsigned int metric_vert; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | 29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
| 30 | [P4_CACHE__##name] = { \ | 30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ |
| 31 | .metric_pebs = P4_PEBS__##name, \ | 31 | [P4_PEBS_METRIC__##name] = { \ |
| 32 | .metric_vert = P4_VERT__##name, \ | 32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ |
| 33 | .metric_vert = vert, \ | ||
| 33 | } | 34 | } |
| 34 | 35 | ||
| 35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | 36 | /* |
| 36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | 37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here |
| 37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | 38 | * |
| 38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | 39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of |
| 39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | 40 | * event configuration to find out which values are to be |
| 41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | ||
| 42 | * resgisters | ||
| 43 | */ | ||
| 44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | ||
| 45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | ||
| 46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | ||
| 47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | ||
| 48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | ||
| 49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | ||
| 50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | ||
| 51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | ||
| 52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | ||
| 53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | ||
| 40 | }; | 54 | }; |
| 41 | 55 | ||
| 42 | /* | 56 | /* |
| @@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
| 281 | }, | 295 | }, |
| 282 | }; | 296 | }; |
| 283 | 297 | ||
| 284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | 298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
| 285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | 299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
| 286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | 300 | P4_ESCR_EMASK_BIT(event, bit)) | \ |
| 287 | p4_config_pack_cccr(cache_event | \ | 301 | p4_config_pack_cccr(metric | \ |
| 288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | 302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
| 289 | 303 | ||
| 290 | static __initconst const u64 p4_hw_cache_event_ids | 304 | static __initconst const u64 p4_hw_cache_event_ids |
| @@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids | |||
| 296 | [ C(OP_READ) ] = { | 310 | [ C(OP_READ) ] = { |
| 297 | [ C(RESULT_ACCESS) ] = 0x0, | 311 | [ C(RESULT_ACCESS) ] = 0x0, |
| 298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 299 | P4_CACHE__1stl_cache_load_miss_retired), | 313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
| 300 | }, | 314 | }, |
| 301 | }, | 315 | }, |
| 302 | [ C(LL ) ] = { | 316 | [ C(LL ) ] = { |
| 303 | [ C(OP_READ) ] = { | 317 | [ C(OP_READ) ] = { |
| 304 | [ C(RESULT_ACCESS) ] = 0x0, | 318 | [ C(RESULT_ACCESS) ] = 0x0, |
| 305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 306 | P4_CACHE__2ndl_cache_load_miss_retired), | 320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
| 307 | }, | 321 | }, |
| 308 | }, | 322 | }, |
| 309 | [ C(DTLB) ] = { | 323 | [ C(DTLB) ] = { |
| 310 | [ C(OP_READ) ] = { | 324 | [ C(OP_READ) ] = { |
| 311 | [ C(RESULT_ACCESS) ] = 0x0, | 325 | [ C(RESULT_ACCESS) ] = 0x0, |
| 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 313 | P4_CACHE__dtlb_load_miss_retired), | 327 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
| 314 | }, | 328 | }, |
| 315 | [ C(OP_WRITE) ] = { | 329 | [ C(OP_WRITE) ] = { |
| 316 | [ C(RESULT_ACCESS) ] = 0x0, | 330 | [ C(RESULT_ACCESS) ] = 0x0, |
| 317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | 331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
| 318 | P4_CACHE__dtlb_store_miss_retired), | 332 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
| 319 | }, | 333 | }, |
| 320 | }, | 334 | }, |
| 321 | [ C(ITLB) ] = { | 335 | [ C(ITLB) ] = { |
| 322 | [ C(OP_READ) ] = { | 336 | [ C(OP_READ) ] = { |
| 323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | 337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
| 324 | P4_CACHE__itlb_reference_hit), | 338 | P4_PEBS_METRIC__none), |
| 325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | 339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
| 326 | P4_CACHE__itlb_reference_miss), | 340 | P4_PEBS_METRIC__none), |
| 327 | }, | 341 | }, |
| 328 | [ C(OP_WRITE) ] = { | 342 | [ C(OP_WRITE) ] = { |
| 329 | [ C(RESULT_ACCESS) ] = -1, | 343 | [ C(RESULT_ACCESS) ] = -1, |
| @@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event) | |||
| 414 | return config; | 428 | return config; |
| 415 | } | 429 | } |
| 416 | 430 | ||
| 431 | static int p4_validate_raw_event(struct perf_event *event) | ||
| 432 | { | ||
| 433 | unsigned int v; | ||
| 434 | |||
| 435 | /* user data may have out-of-bound event index */ | ||
| 436 | v = p4_config_unpack_event(event->attr.config); | ||
| 437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | ||
| 438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | ||
| 439 | return -EINVAL; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * it may have some screwed PEBS bits | ||
| 444 | */ | ||
| 445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | ||
| 446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
| 447 | return -EINVAL; | ||
| 448 | } | ||
| 449 | v = p4_config_unpack_metric(event->attr.config); | ||
| 450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | ||
| 451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
| 452 | return -EINVAL; | ||
| 453 | } | ||
| 454 | |||
| 455 | return 0; | ||
| 456 | } | ||
| 457 | |||
| 417 | static int p4_hw_config(struct perf_event *event) | 458 | static int p4_hw_config(struct perf_event *event) |
| 418 | { | 459 | { |
| 419 | int cpu = get_cpu(); | 460 | int cpu = get_cpu(); |
| 420 | int rc = 0; | 461 | int rc = 0; |
| 421 | unsigned int evnt; | ||
| 422 | u32 escr, cccr; | 462 | u32 escr, cccr; |
| 423 | 463 | ||
| 424 | /* | 464 | /* |
| @@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event) | |||
| 438 | 478 | ||
| 439 | if (event->attr.type == PERF_TYPE_RAW) { | 479 | if (event->attr.type == PERF_TYPE_RAW) { |
| 440 | 480 | ||
| 441 | /* user data may have out-of-bound event index */ | 481 | rc = p4_validate_raw_event(event); |
| 442 | evnt = p4_config_unpack_event(event->attr.config); | 482 | if (rc) |
| 443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
| 444 | rc = -EINVAL; | ||
| 445 | goto out; | 483 | goto out; |
| 446 | } | ||
| 447 | 484 | ||
| 448 | /* | 485 | /* |
| 449 | * We don't control raw events so it's up to the caller | 486 | * We don't control raw events so it's up to the caller |
| @@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event) | |||
| 451 | * on HT machine but allow HT-compatible specifics to be | 488 | * on HT machine but allow HT-compatible specifics to be |
| 452 | * passed on) | 489 | * passed on) |
| 453 | * | 490 | * |
| 491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | ||
| 492 | * bits since we keep additional info here (for cache events and etc) | ||
| 493 | * | ||
| 454 | * XXX: HT wide things should check perf_paranoid_cpu() && | 494 | * XXX: HT wide things should check perf_paranoid_cpu() && |
| 455 | * CAP_SYS_ADMIN | 495 | * CAP_SYS_ADMIN |
| 456 | */ | 496 | */ |
| 457 | event->hw.config |= event->attr.config & | 497 | event->hw.config |= event->attr.config & |
| 458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | |
| 459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); |
| 460 | } | 500 | } |
| 461 | 501 | ||
| 462 | rc = x86_setup_perfctr(event); | 502 | rc = x86_setup_perfctr(event); |
| @@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |||
| 482 | return overflow; | 522 | return overflow; |
| 483 | } | 523 | } |
| 484 | 524 | ||
| 525 | static void p4_pmu_disable_pebs(void) | ||
| 526 | { | ||
| 527 | /* | ||
| 528 | * FIXME | ||
| 529 | * | ||
| 530 | * It's still allowed that two threads setup same cache | ||
| 531 | * events so we can't simply clear metrics until we knew | ||
| 532 | * noone is depending on us, so we need kind of counter | ||
| 533 | * for "ReplayEvent" users. | ||
| 534 | * | ||
| 535 | * What is more complex -- RAW events, if user (for some | ||
| 536 | * reason) will pass some cache event metric with improper | ||
| 537 | * event opcode -- it's fine from hardware point of view | ||
| 538 | * but completely nonsence from "meaning" of such action. | ||
| 539 | * | ||
| 540 | * So at moment let leave metrics turned on forever -- it's | ||
| 541 | * ok for now but need to be revisited! | ||
| 542 | * | ||
| 543 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | ||
| 544 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | ||
| 545 | */ | ||
| 546 | } | ||
| 547 | |||
| 485 | static inline void p4_pmu_disable_event(struct perf_event *event) | 548 | static inline void p4_pmu_disable_event(struct perf_event *event) |
| 486 | { | 549 | { |
| 487 | struct hw_perf_event *hwc = &event->hw; | 550 | struct hw_perf_event *hwc = &event->hw; |
| @@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void) | |||
| 507 | continue; | 570 | continue; |
| 508 | p4_pmu_disable_event(event); | 571 | p4_pmu_disable_event(event); |
| 509 | } | 572 | } |
| 573 | |||
| 574 | p4_pmu_disable_pebs(); | ||
| 575 | } | ||
| 576 | |||
| 577 | /* configuration must be valid */ | ||
| 578 | static void p4_pmu_enable_pebs(u64 config) | ||
| 579 | { | ||
| 580 | struct p4_pebs_bind *bind; | ||
| 581 | unsigned int idx; | ||
| 582 | |||
| 583 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | ||
| 584 | |||
| 585 | idx = p4_config_unpack_metric(config); | ||
| 586 | if (idx == P4_PEBS_METRIC__none) | ||
| 587 | return; | ||
| 588 | |||
| 589 | bind = &p4_pebs_bind_map[idx]; | ||
| 590 | |||
| 591 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | ||
| 592 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | ||
| 510 | } | 593 | } |
| 511 | 594 | ||
| 512 | static void p4_pmu_enable_event(struct perf_event *event) | 595 | static void p4_pmu_enable_event(struct perf_event *event) |
| @@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
| 515 | int thread = p4_ht_config_thread(hwc->config); | 598 | int thread = p4_ht_config_thread(hwc->config); |
| 516 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | 599 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); |
| 517 | unsigned int idx = p4_config_unpack_event(hwc->config); | 600 | unsigned int idx = p4_config_unpack_event(hwc->config); |
| 518 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
| 519 | struct p4_event_bind *bind; | 601 | struct p4_event_bind *bind; |
| 520 | struct p4_cache_event_bind *bind_cache; | ||
| 521 | u64 escr_addr, cccr; | 602 | u64 escr_addr, cccr; |
| 522 | 603 | ||
| 523 | bind = &p4_event_bind_map[idx]; | 604 | bind = &p4_event_bind_map[idx]; |
| @@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
| 537 | cccr = p4_config_unpack_cccr(hwc->config); | 618 | cccr = p4_config_unpack_cccr(hwc->config); |
| 538 | 619 | ||
| 539 | /* | 620 | /* |
| 540 | * it could be Cache event so that we need to | 621 | * it could be Cache event so we need to write metrics |
| 541 | * set metrics into additional MSRs | 622 | * into additional MSRs |
| 542 | */ | 623 | */ |
| 543 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | 624 | p4_pmu_enable_pebs(hwc->config); |
| 544 | if (idx_cache > P4_CACHE__NONE && | ||
| 545 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
| 546 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
| 547 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
| 548 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
| 549 | } | ||
| 550 | 625 | ||
| 551 | (void)checking_wrmsrl(escr_addr, escr_conf); | 626 | (void)checking_wrmsrl(escr_addr, escr_conf); |
| 552 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 627 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
| @@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
| 829 | .max_period = (1ULL << 39) - 1, | 904 | .max_period = (1ULL << 39) - 1, |
| 830 | .hw_config = p4_hw_config, | 905 | .hw_config = p4_hw_config, |
| 831 | .schedule_events = p4_pmu_schedule_events, | 906 | .schedule_events = p4_pmu_schedule_events, |
| 907 | /* | ||
| 908 | * This handles erratum N15 in intel doc 249199-029, | ||
| 909 | * the counter may not be updated correctly on write | ||
| 910 | * so we need a second write operation to do the trick | ||
| 911 | * (the official workaround didn't work) | ||
| 912 | * | ||
| 913 | * the former idea is taken from OProfile code | ||
| 914 | */ | ||
| 915 | .perfctr_second_write = 1, | ||
| 832 | }; | 916 | }; |
| 833 | 917 | ||
| 834 | static __init int p4_pmu_init(void) | 918 | static __init int p4_pmu_init(void) |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c89a386930b7..6e8752c1bd52 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | 18 | ||
| 19 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
| 20 | 20 | ||
| 21 | #include "dumpstack.h" | ||
| 22 | 21 | ||
| 23 | int panic_on_unrecovered_nmi; | 22 | int panic_on_unrecovered_nmi; |
| 24 | int panic_on_io_nmi; | 23 | int panic_on_io_nmi; |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h deleted file mode 100644 index e1a93be4fd44..000000000000 --- a/arch/x86/kernel/dumpstack.h +++ /dev/null | |||
| @@ -1,56 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | */ | ||
| 5 | |||
| 6 | #ifndef DUMPSTACK_H | ||
| 7 | #define DUMPSTACK_H | ||
| 8 | |||
| 9 | #ifdef CONFIG_X86_32 | ||
| 10 | #define STACKSLOTS_PER_LINE 8 | ||
| 11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
| 12 | #else | ||
| 13 | #define STACKSLOTS_PER_LINE 4 | ||
| 14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
| 15 | #endif | ||
| 16 | |||
| 17 | #include <linux/uaccess.h> | ||
| 18 | |||
| 19 | extern void | ||
| 20 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 21 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
| 22 | |||
| 23 | extern void | ||
| 24 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 25 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
| 26 | |||
| 27 | extern unsigned int code_bytes; | ||
| 28 | |||
| 29 | /* The form of the top of the frame on the stack */ | ||
| 30 | struct stack_frame { | ||
| 31 | struct stack_frame *next_frame; | ||
| 32 | unsigned long return_address; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct stack_frame_ia32 { | ||
| 36 | u32 next_frame; | ||
| 37 | u32 return_address; | ||
| 38 | }; | ||
| 39 | |||
| 40 | static inline unsigned long rewind_frame_pointer(int n) | ||
| 41 | { | ||
| 42 | struct stack_frame *frame; | ||
| 43 | |||
| 44 | get_bp(frame); | ||
| 45 | |||
| 46 | #ifdef CONFIG_FRAME_POINTER | ||
| 47 | while (n--) { | ||
| 48 | if (probe_kernel_address(&frame->next_frame, frame)) | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | #endif | ||
| 52 | |||
| 53 | return (unsigned long)frame; | ||
| 54 | } | ||
| 55 | |||
| 56 | #endif /* DUMPSTACK_H */ | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 11540a189d93..0f6376ffa2d9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
| @@ -16,8 +16,6 @@ | |||
| 16 | 16 | ||
| 17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
| 18 | 18 | ||
| 19 | #include "dumpstack.h" | ||
| 20 | |||
| 21 | 19 | ||
| 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
| 23 | unsigned long *stack, unsigned long bp, | 21 | unsigned long *stack, unsigned long bp, |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 272c9f1f05f3..57a21f11c791 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | 16 | ||
| 17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
| 18 | 18 | ||
| 19 | #include "dumpstack.h" | ||
| 20 | 19 | ||
| 21 | #define N_EXCEPTION_STACKS_END \ | 20 | #define N_EXCEPTION_STACKS_END \ |
| 22 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | 21 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b803d2fd..a474ec37c32f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
| @@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type, | |||
| 208 | { | 208 | { |
| 209 | /* Len */ | 209 | /* Len */ |
| 210 | switch (x86_len) { | 210 | switch (x86_len) { |
| 211 | case X86_BREAKPOINT_LEN_X: | ||
| 212 | *gen_len = sizeof(long); | ||
| 213 | break; | ||
| 211 | case X86_BREAKPOINT_LEN_1: | 214 | case X86_BREAKPOINT_LEN_1: |
| 212 | *gen_len = HW_BREAKPOINT_LEN_1; | 215 | *gen_len = HW_BREAKPOINT_LEN_1; |
| 213 | break; | 216 | break; |
| @@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
| 251 | 254 | ||
| 252 | info->address = bp->attr.bp_addr; | 255 | info->address = bp->attr.bp_addr; |
| 253 | 256 | ||
| 257 | /* Type */ | ||
| 258 | switch (bp->attr.bp_type) { | ||
| 259 | case HW_BREAKPOINT_W: | ||
| 260 | info->type = X86_BREAKPOINT_WRITE; | ||
| 261 | break; | ||
| 262 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
| 263 | info->type = X86_BREAKPOINT_RW; | ||
| 264 | break; | ||
| 265 | case HW_BREAKPOINT_X: | ||
| 266 | info->type = X86_BREAKPOINT_EXECUTE; | ||
| 267 | /* | ||
| 268 | * x86 inst breakpoints need to have a specific undefined len. | ||
| 269 | * But we still need to check userspace is not trying to setup | ||
| 270 | * an unsupported length, to get a range breakpoint for example. | ||
| 271 | */ | ||
| 272 | if (bp->attr.bp_len == sizeof(long)) { | ||
| 273 | info->len = X86_BREAKPOINT_LEN_X; | ||
| 274 | return 0; | ||
| 275 | } | ||
| 276 | default: | ||
| 277 | return -EINVAL; | ||
| 278 | } | ||
| 279 | |||
| 254 | /* Len */ | 280 | /* Len */ |
| 255 | switch (bp->attr.bp_len) { | 281 | switch (bp->attr.bp_len) { |
| 256 | case HW_BREAKPOINT_LEN_1: | 282 | case HW_BREAKPOINT_LEN_1: |
| @@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
| 271 | return -EINVAL; | 297 | return -EINVAL; |
| 272 | } | 298 | } |
| 273 | 299 | ||
| 274 | /* Type */ | ||
| 275 | switch (bp->attr.bp_type) { | ||
| 276 | case HW_BREAKPOINT_W: | ||
| 277 | info->type = X86_BREAKPOINT_WRITE; | ||
| 278 | break; | ||
| 279 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
| 280 | info->type = X86_BREAKPOINT_RW; | ||
| 281 | break; | ||
| 282 | case HW_BREAKPOINT_X: | ||
| 283 | info->type = X86_BREAKPOINT_EXECUTE; | ||
| 284 | break; | ||
| 285 | default: | ||
| 286 | return -EINVAL; | ||
| 287 | } | ||
| 288 | |||
| 289 | return 0; | 300 | return 0; |
| 290 | } | 301 | } |
| 291 | /* | 302 | /* |
| @@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
| 305 | ret = -EINVAL; | 316 | ret = -EINVAL; |
| 306 | 317 | ||
| 307 | switch (info->len) { | 318 | switch (info->len) { |
| 319 | case X86_BREAKPOINT_LEN_X: | ||
| 320 | align = sizeof(long) -1; | ||
| 321 | break; | ||
| 308 | case X86_BREAKPOINT_LEN_1: | 322 | case X86_BREAKPOINT_LEN_1: |
| 309 | align = 0; | 323 | align = 0; |
| 310 | break; | 324 | break; |
| @@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
| 466 | 480 | ||
| 467 | perf_bp_event(bp, args->regs); | 481 | perf_bp_event(bp, args->regs); |
| 468 | 482 | ||
| 483 | /* | ||
| 484 | * Set up resume flag to avoid breakpoint recursion when | ||
| 485 | * returning back to origin. | ||
| 486 | */ | ||
| 487 | if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) | ||
| 488 | args->regs->flags |= X86_EFLAGS_RF; | ||
| 489 | |||
| 469 | rcu_read_unlock(); | 490 | rcu_read_unlock(); |
| 470 | } | 491 | } |
| 471 | /* | 492 | /* |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 675879b65ce6..1bfb6cf4dd55 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to) | |||
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | /* | 128 | /* |
| 129 | * Check for the REX prefix which can only exist on X86_64 | 129 | * Skip the prefixes of the instruction. |
| 130 | * X86_32 always returns 0 | ||
| 131 | */ | 130 | */ |
| 132 | static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) | 131 | static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) |
| 133 | { | 132 | { |
| 133 | insn_attr_t attr; | ||
| 134 | |||
| 135 | attr = inat_get_opcode_attribute((insn_byte_t)*insn); | ||
| 136 | while (inat_is_legacy_prefix(attr)) { | ||
| 137 | insn++; | ||
| 138 | attr = inat_get_opcode_attribute((insn_byte_t)*insn); | ||
| 139 | } | ||
| 134 | #ifdef CONFIG_X86_64 | 140 | #ifdef CONFIG_X86_64 |
| 135 | if ((*insn & 0xf0) == 0x40) | 141 | if (inat_is_rex_prefix(attr)) |
| 136 | return 1; | 142 | insn++; |
| 137 | #endif | 143 | #endif |
| 138 | return 0; | 144 | return insn; |
| 139 | } | 145 | } |
| 140 | 146 | ||
| 141 | /* | 147 | /* |
| @@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr) | |||
| 272 | */ | 278 | */ |
| 273 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | 279 | static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) |
| 274 | { | 280 | { |
| 281 | /* Skip prefixes */ | ||
| 282 | insn = skip_prefixes(insn); | ||
| 283 | |||
| 275 | switch (*insn) { | 284 | switch (*insn) { |
| 276 | case 0xfa: /* cli */ | 285 | case 0xfa: /* cli */ |
| 277 | case 0xfb: /* sti */ | 286 | case 0xfb: /* sti */ |
| @@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
| 280 | return 1; | 289 | return 1; |
| 281 | } | 290 | } |
| 282 | 291 | ||
| 283 | /* | ||
| 284 | * on X86_64, 0x40-0x4f are REX prefixes so we need to look | ||
| 285 | * at the next byte instead.. but of course not recurse infinitely | ||
| 286 | */ | ||
| 287 | if (is_REX_prefix(insn)) | ||
| 288 | return is_IF_modifier(++insn); | ||
| 289 | |||
| 290 | return 0; | 292 | return 0; |
| 291 | } | 293 | } |
| 292 | 294 | ||
| @@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
| 803 | unsigned long orig_ip = (unsigned long)p->addr; | 805 | unsigned long orig_ip = (unsigned long)p->addr; |
| 804 | kprobe_opcode_t *insn = p->ainsn.insn; | 806 | kprobe_opcode_t *insn = p->ainsn.insn; |
| 805 | 807 | ||
| 806 | /*skip the REX prefix*/ | 808 | /* Skip prefixes */ |
| 807 | if (is_REX_prefix(insn)) | 809 | insn = skip_prefixes(insn); |
| 808 | insn++; | ||
| 809 | 810 | ||
| 810 | regs->flags &= ~X86_EFLAGS_TF; | 811 | regs->flags &= ~X86_EFLAGS_TF; |
| 811 | switch (*insn) { | 812 | switch (*insn) { |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af47..96586c3cbbbf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -57,6 +57,8 @@ | |||
| 57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
| 58 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
| 59 | 59 | ||
| 60 | #include <trace/events/power.h> | ||
| 61 | |||
| 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| 61 | 63 | ||
| 62 | /* | 64 | /* |
| @@ -111,6 +113,8 @@ void cpu_idle(void) | |||
| 111 | stop_critical_timings(); | 113 | stop_critical_timings(); |
| 112 | pm_idle(); | 114 | pm_idle(); |
| 113 | start_critical_timings(); | 115 | start_critical_timings(); |
| 116 | |||
| 117 | trace_power_end(smp_processor_id()); | ||
| 114 | } | 118 | } |
| 115 | tick_nohz_restart_sched_tick(); | 119 | tick_nohz_restart_sched_tick(); |
| 116 | preempt_enable_no_resched(); | 120 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c2422a99f1f..3d9ea531ddd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -51,6 +51,8 @@ | |||
| 51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
| 52 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
| 53 | 53 | ||
| 54 | #include <trace/events/power.h> | ||
| 55 | |||
| 54 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
| 55 | 57 | ||
| 56 | DEFINE_PER_CPU(unsigned long, old_rsp); | 58 | DEFINE_PER_CPU(unsigned long, old_rsp); |
| @@ -138,6 +140,9 @@ void cpu_idle(void) | |||
| 138 | stop_critical_timings(); | 140 | stop_critical_timings(); |
| 139 | pm_idle(); | 141 | pm_idle(); |
| 140 | start_critical_timings(); | 142 | start_critical_timings(); |
| 143 | |||
| 144 | trace_power_end(smp_processor_id()); | ||
| 145 | |||
| 141 | /* In many cases the interrupt that ended idle | 146 | /* In many cases the interrupt that ended idle |
| 142 | has already called exit_idle. But some idle | 147 | has already called exit_idle. But some idle |
| 143 | loops can be woken up without interrupt. */ | 148 | loops can be woken up without interrupt. */ |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 922eefbb3f6c..b53c525368a7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
| @@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name) | |||
| 23 | return 0; | 23 | return 0; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 26 | static void |
| 27 | __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) | ||
| 27 | { | 28 | { |
| 28 | struct stack_trace *trace = data; | 29 | struct stack_trace *trace = data; |
| 30 | #ifdef CONFIG_FRAME_POINTER | ||
| 29 | if (!reliable) | 31 | if (!reliable) |
| 30 | return; | 32 | return; |
| 33 | #endif | ||
| 34 | if (nosched && in_sched_functions(addr)) | ||
| 35 | return; | ||
| 31 | if (trace->skip > 0) { | 36 | if (trace->skip > 0) { |
| 32 | trace->skip--; | 37 | trace->skip--; |
| 33 | return; | 38 | return; |
| @@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) | |||
| 36 | trace->entries[trace->nr_entries++] = addr; | 41 | trace->entries[trace->nr_entries++] = addr; |
| 37 | } | 42 | } |
| 38 | 43 | ||
| 44 | static void save_stack_address(void *data, unsigned long addr, int reliable) | ||
| 45 | { | ||
| 46 | return __save_stack_address(data, addr, reliable, false); | ||
| 47 | } | ||
| 48 | |||
| 39 | static void | 49 | static void |
| 40 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 50 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) |
| 41 | { | 51 | { |
| 42 | struct stack_trace *trace = (struct stack_trace *)data; | 52 | return __save_stack_address(data, addr, reliable, true); |
| 43 | if (!reliable) | ||
| 44 | return; | ||
| 45 | if (in_sched_functions(addr)) | ||
| 46 | return; | ||
| 47 | if (trace->skip > 0) { | ||
| 48 | trace->skip--; | ||
| 49 | return; | ||
| 50 | } | ||
| 51 | if (trace->nr_entries < trace->max_entries) | ||
| 52 | trace->entries[trace->nr_entries++] = addr; | ||
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | static const struct stacktrace_ops save_stack_ops = { | 55 | static const struct stacktrace_ops save_stack_ops = { |
| @@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | |||
| 96 | 96 | ||
| 97 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | 97 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ |
| 98 | 98 | ||
| 99 | struct stack_frame { | 99 | struct stack_frame_user { |
| 100 | const void __user *next_fp; | 100 | const void __user *next_fp; |
| 101 | unsigned long ret_addr; | 101 | unsigned long ret_addr; |
| 102 | }; | 102 | }; |
| 103 | 103 | ||
| 104 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 104 | static int |
| 105 | copy_stack_frame(const void __user *fp, struct stack_frame_user *frame) | ||
| 105 | { | 106 | { |
| 106 | int ret; | 107 | int ret; |
| 107 | 108 | ||
| @@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace) | |||
| 126 | trace->entries[trace->nr_entries++] = regs->ip; | 127 | trace->entries[trace->nr_entries++] = regs->ip; |
| 127 | 128 | ||
| 128 | while (trace->nr_entries < trace->max_entries) { | 129 | while (trace->nr_entries < trace->max_entries) { |
| 129 | struct stack_frame frame; | 130 | struct stack_frame_user frame; |
| 130 | 131 | ||
| 131 | frame.next_fp = NULL; | 132 | frame.next_fp = NULL; |
| 132 | frame.ret_addr = 0; | 133 | frame.ret_addr = 0; |
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/xtensa/include/asm/local64.h | |||
| @@ -0,0 +1 @@ | |||
| #include <asm-generic/local64.h> | |||
