aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/local64.h1
-rw-r--r--arch/arm/include/asm/local64.h1
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/avr32/include/asm/local64.h1
-rw-r--r--arch/blackfin/include/asm/local64.h1
-rw-r--r--arch/cris/include/asm/local64.h1
-rw-r--r--arch/frv/include/asm/local64.h1
-rw-r--r--arch/frv/kernel/local64.h1
-rw-r--r--arch/h8300/include/asm/local64.h1
-rw-r--r--arch/ia64/include/asm/local64.h1
-rw-r--r--arch/m32r/include/asm/local64.h1
-rw-r--r--arch/m68k/include/asm/local64.h1
-rw-r--r--arch/microblaze/include/asm/local64.h1
-rw-r--r--arch/mips/include/asm/local64.h1
-rw-r--r--arch/mn10300/include/asm/local64.h1
-rw-r--r--arch/parisc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/perf_event.h12
-rw-r--r--arch/powerpc/kernel/misc.S26
-rw-r--r--arch/powerpc/kernel/perf_event.c41
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/score/include/asm/local64.h1
-rw-r--r--arch/sh/include/asm/local64.h1
-rw-r--r--arch/sh/kernel/perf_event.c6
-rw-r--r--arch/sparc/include/asm/local64.h1
-rw-r--r--arch/sparc/include/asm/perf_event.h8
-rw-r--r--arch/sparc/kernel/helpers.S6
-rw-r--r--arch/sparc/kernel/perf_event.c25
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/perf_event.h18
-rw-r--r--arch/x86/include/asm/perf_event_p4.h99
-rw-r--r--arch/x86/include/asm/stacktrace.h49
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c156
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/kprobes.c33
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/xtensa/include/asm/local64.h1
45 files changed, 432 insertions, 302 deletions
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/alpha/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/arm/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index de12536d687f..417c392ddf1c 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
164 struct hw_perf_event *hwc, 164 struct hw_perf_event *hwc,
165 int idx) 165 int idx)
166{ 166{
167 s64 left = atomic64_read(&hwc->period_left); 167 s64 left = local64_read(&hwc->period_left);
168 s64 period = hwc->sample_period; 168 s64 period = hwc->sample_period;
169 int ret = 0; 169 int ret = 0;
170 170
171 if (unlikely(left <= -period)) { 171 if (unlikely(left <= -period)) {
172 left = period; 172 left = period;
173 atomic64_set(&hwc->period_left, left); 173 local64_set(&hwc->period_left, left);
174 hwc->last_period = period; 174 hwc->last_period = period;
175 ret = 1; 175 ret = 1;
176 } 176 }
177 177
178 if (unlikely(left <= 0)) { 178 if (unlikely(left <= 0)) {
179 left += period; 179 left += period;
180 atomic64_set(&hwc->period_left, left); 180 local64_set(&hwc->period_left, left);
181 hwc->last_period = period; 181 hwc->last_period = period;
182 ret = 1; 182 ret = 1;
183 } 183 }
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
185 if (left > (s64)armpmu->max_period) 185 if (left > (s64)armpmu->max_period)
186 left = armpmu->max_period; 186 left = armpmu->max_period;
187 187
188 atomic64_set(&hwc->prev_count, (u64)-left); 188 local64_set(&hwc->prev_count, (u64)-left);
189 189
190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191 191
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
204 u64 delta; 204 u64 delta;
205 205
206again: 206again:
207 prev_raw_count = atomic64_read(&hwc->prev_count); 207 prev_raw_count = local64_read(&hwc->prev_count);
208 new_raw_count = armpmu->read_counter(idx); 208 new_raw_count = armpmu->read_counter(idx);
209 209
210 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 210 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211 new_raw_count) != prev_raw_count) 211 new_raw_count) != prev_raw_count)
212 goto again; 212 goto again;
213 213
214 delta = (new_raw_count << shift) - (prev_raw_count << shift); 214 delta = (new_raw_count << shift) - (prev_raw_count << shift);
215 delta >>= shift; 215 delta >>= shift;
216 216
217 atomic64_add(delta, &event->count); 217 local64_add(delta, &event->count);
218 atomic64_sub(delta, &hwc->period_left); 218 local64_sub(delta, &hwc->period_left);
219 219
220 return new_raw_count; 220 return new_raw_count;
221} 221}
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
478 if (!hwc->sample_period) { 478 if (!hwc->sample_period) {
479 hwc->sample_period = armpmu->max_period; 479 hwc->sample_period = armpmu->max_period;
480 hwc->last_period = hwc->sample_period; 480 hwc->last_period = hwc->sample_period;
481 atomic64_set(&hwc->period_left, hwc->sample_period); 481 local64_set(&hwc->period_left, hwc->sample_period);
482 } 482 }
483 483
484 err = 0; 484 err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/avr32/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/blackfin/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/cris/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/kernel/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/h8300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m32r/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m68k/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/microblaze/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mips/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mn10300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/parisc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/powerpc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index e6d4ce69b126..5c16b891d501 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -21,3 +21,15 @@
21#ifdef CONFIG_FSL_EMB_PERF_EVENT 21#ifdef CONFIG_FSL_EMB_PERF_EVENT
22#include <asm/perf_event_fsl_emb.h> 22#include <asm/perf_event_fsl_emb.h>
23#endif 23#endif
24
25#ifdef CONFIG_PERF_EVENTS
26#include <asm/ptrace.h>
27#include <asm/reg.h>
28
29#define perf_arch_fetch_caller_regs(regs, __ip) \
30 do { \
31 (regs)->nip = __ip; \
32 (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
33 asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
34 } while (0)
35#endif
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a556..2d29752cbe16 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
127_GLOBAL(__restore_cpu_power7) 127_GLOBAL(__restore_cpu_power7)
128 /* place holder */ 128 /* place holder */
129 blr 129 blr
130
131/*
132 * Get a minimal set of registers for our caller's nth caller.
133 * r3 = regs pointer, r5 = n.
134 *
135 * We only get R1 (stack pointer), NIP (next instruction pointer)
136 * and LR (link register). These are all we can get in the
137 * general case without doing complicated stack unwinding, but
138 * fortunately they are enough to do a stack backtrace, which
139 * is all we need them for.
140 */
141_GLOBAL(perf_arch_fetch_caller_regs)
142 mr r6,r1
143 cmpwi r5,0
144 mflr r4
145 ble 2f
146 mtctr r5
1471: PPC_LL r6,0(r6)
148 bdnz 1b
149 PPC_LL r4,PPC_LR_STKOFF(r6)
1502: PPC_LL r7,0(r6)
151 PPC_LL r7,PPC_LR_STKOFF(r7)
152 PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
153 PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
154 PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
155 blr
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5c14ffe51258..d301a30445e0 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
410 * Therefore we treat them like NMIs. 410 * Therefore we treat them like NMIs.
411 */ 411 */
412 do { 412 do {
413 prev = atomic64_read(&event->hw.prev_count); 413 prev = local64_read(&event->hw.prev_count);
414 barrier(); 414 barrier();
415 val = read_pmc(event->hw.idx); 415 val = read_pmc(event->hw.idx);
416 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 416 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
417 417
418 /* The counters are only 32 bits wide */ 418 /* The counters are only 32 bits wide */
419 delta = (val - prev) & 0xfffffffful; 419 delta = (val - prev) & 0xfffffffful;
420 atomic64_add(delta, &event->count); 420 local64_add(delta, &event->count);
421 atomic64_sub(delta, &event->hw.period_left); 421 local64_sub(delta, &event->hw.period_left);
422} 422}
423 423
424/* 424/*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
444 if (!event->hw.idx) 444 if (!event->hw.idx)
445 continue; 445 continue;
446 val = (event->hw.idx == 5) ? pmc5 : pmc6; 446 val = (event->hw.idx == 5) ? pmc5 : pmc6;
447 prev = atomic64_read(&event->hw.prev_count); 447 prev = local64_read(&event->hw.prev_count);
448 event->hw.idx = 0; 448 event->hw.idx = 0;
449 delta = (val - prev) & 0xfffffffful; 449 delta = (val - prev) & 0xfffffffful;
450 atomic64_add(delta, &event->count); 450 local64_add(delta, &event->count);
451 } 451 }
452} 452}
453 453
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
462 event = cpuhw->limited_counter[i]; 462 event = cpuhw->limited_counter[i];
463 event->hw.idx = cpuhw->limited_hwidx[i]; 463 event->hw.idx = cpuhw->limited_hwidx[i];
464 val = (event->hw.idx == 5) ? pmc5 : pmc6; 464 val = (event->hw.idx == 5) ? pmc5 : pmc6;
465 atomic64_set(&event->hw.prev_count, val); 465 local64_set(&event->hw.prev_count, val);
466 perf_event_update_userpage(event); 466 perf_event_update_userpage(event);
467 } 467 }
468} 468}
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
666 } 666 }
667 val = 0; 667 val = 0;
668 if (event->hw.sample_period) { 668 if (event->hw.sample_period) {
669 left = atomic64_read(&event->hw.period_left); 669 left = local64_read(&event->hw.period_left);
670 if (left < 0x80000000L) 670 if (left < 0x80000000L)
671 val = 0x80000000L - left; 671 val = 0x80000000L - left;
672 } 672 }
673 atomic64_set(&event->hw.prev_count, val); 673 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 674 event->hw.idx = idx;
675 write_pmc(idx, val); 675 write_pmc(idx, val);
676 perf_event_update_userpage(event); 676 perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
754 * skip the schedulability test here, it will be peformed 754 * skip the schedulability test here, it will be peformed
755 * at commit time(->commit_txn) as a whole 755 * at commit time(->commit_txn) as a whole
756 */ 756 */
757 if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) 757 if (cpuhw->group_flag & PERF_EVENT_TXN)
758 goto nocheck; 758 goto nocheck;
759 759
760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) 760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
845 if (left < 0x80000000L) 845 if (left < 0x80000000L)
846 val = 0x80000000L - left; 846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val); 847 write_pmc(event->hw.idx, val);
848 atomic64_set(&event->hw.prev_count, val); 848 local64_set(&event->hw.prev_count, val);
849 atomic64_set(&event->hw.period_left, left); 849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 850 perf_event_update_userpage(event);
851 perf_enable(); 851 perf_enable();
852 local_irq_restore(flags); 852 local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
861{ 861{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 863
864 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 864 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 865 cpuhw->n_txn_start = cpuhw->n_events;
866} 866}
867 867
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
874{ 874{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 876
877 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 877 cpuhw->group_flag &= ~PERF_EVENT_TXN;
878} 878}
879 879
880/* 880/*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
900 for (i = cpuhw->n_txn_start; i < n; ++i) 900 for (i = cpuhw->n_txn_start; i < n; ++i)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 901 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 902
903 cpuhw->group_flag &= ~PERF_EVENT_TXN;
903 return 0; 904 return 0;
904} 905}
905 906
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1111 event->hw.config = events[n]; 1112 event->hw.config = events[n];
1112 event->hw.event_base = cflags[n]; 1113 event->hw.event_base = cflags[n];
1113 event->hw.last_period = event->hw.sample_period; 1114 event->hw.last_period = event->hw.sample_period;
1114 atomic64_set(&event->hw.period_left, event->hw.last_period); 1115 local64_set(&event->hw.period_left, event->hw.last_period);
1115 1116
1116 /* 1117 /*
1117 * See if we need to reserve the PMU. 1118 * See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 int record = 0; 1150 int record = 0;
1150 1151
1151 /* we don't have to worry about interrupts here */ 1152 /* we don't have to worry about interrupts here */
1152 prev = atomic64_read(&event->hw.prev_count); 1153 prev = local64_read(&event->hw.prev_count);
1153 delta = (val - prev) & 0xfffffffful; 1154 delta = (val - prev) & 0xfffffffful;
1154 atomic64_add(delta, &event->count); 1155 local64_add(delta, &event->count);
1155 1156
1156 /* 1157 /*
1157 * See if the total period for this event has expired, 1158 * See if the total period for this event has expired,
1158 * and update for the next period. 1159 * and update for the next period.
1159 */ 1160 */
1160 val = 0; 1161 val = 0;
1161 left = atomic64_read(&event->hw.period_left) - delta; 1162 left = local64_read(&event->hw.period_left) - delta;
1162 if (period) { 1163 if (period) {
1163 if (left <= 0) { 1164 if (left <= 0) {
1164 left += period; 1165 left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1196 } 1197 }
1197 1198
1198 write_pmc(event->hw.idx, val); 1199 write_pmc(event->hw.idx, val);
1199 atomic64_set(&event->hw.prev_count, val); 1200 local64_set(&event->hw.prev_count, val);
1200 atomic64_set(&event->hw.period_left, left); 1201 local64_set(&event->hw.period_left, left);
1201 perf_event_update_userpage(event); 1202 perf_event_update_userpage(event);
1202} 1203}
1203 1204
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/score/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sh/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 81b6de41ae5d..7a3dc3567258 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
185 * this is the simplest approach for maintaining consistency. 185 * this is the simplest approach for maintaining consistency.
186 */ 186 */
187again: 187again:
188 prev_raw_count = atomic64_read(&hwc->prev_count); 188 prev_raw_count = local64_read(&hwc->prev_count);
189 new_raw_count = sh_pmu->read(idx); 189 new_raw_count = sh_pmu->read(idx);
190 190
191 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 191 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
192 new_raw_count) != prev_raw_count) 192 new_raw_count) != prev_raw_count)
193 goto again; 193 goto again;
194 194
@@ -203,7 +203,7 @@ again:
203 delta = (new_raw_count << shift) - (prev_raw_count << shift); 203 delta = (new_raw_count << shift) - (prev_raw_count << shift);
204 delta >>= shift; 204 delta >>= shift;
205 205
206 atomic64_add(delta, &event->count); 206 local64_add(delta, &event->count);
207} 207}
208 208
209static void sh_pmu_disable(struct perf_event *event) 209static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sparc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 7e2669894ce8..74c4e0cd889c 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
6#define PERF_EVENT_INDEX_OFFSET 0 6#define PERF_EVENT_INDEX_OFFSET 0
7 7
8#ifdef CONFIG_PERF_EVENTS 8#ifdef CONFIG_PERF_EVENTS
9#include <asm/ptrace.h>
10
9extern void init_hw_perf_events(void); 11extern void init_hw_perf_events(void);
12
13extern void
14__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
15
16#define perf_arch_fetch_caller_regs(pt_regs, ip) \
17 __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
10#else 18#else
11static inline void init_hw_perf_events(void) { } 19static inline void init_hw_perf_events(void) { }
12#endif 20#endif
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
index 92090cc9e829..682fee06a16b 100644
--- a/arch/sparc/kernel/helpers.S
+++ b/arch/sparc/kernel/helpers.S
@@ -47,9 +47,9 @@ stack_trace_flush:
47 .size stack_trace_flush,.-stack_trace_flush 47 .size stack_trace_flush,.-stack_trace_flush
48 48
49#ifdef CONFIG_PERF_EVENTS 49#ifdef CONFIG_PERF_EVENTS
50 .globl perf_arch_fetch_caller_regs 50 .globl __perf_arch_fetch_caller_regs
51 .type perf_arch_fetch_caller_regs,#function 51 .type __perf_arch_fetch_caller_regs,#function
52perf_arch_fetch_caller_regs: 52__perf_arch_fetch_caller_regs:
53 /* We always read the %pstate into %o5 since we will use 53 /* We always read the %pstate into %o5 since we will use
54 * that to construct a fake %tstate to store into the regs. 54 * that to construct a fake %tstate to store into the regs.
55 */ 55 */
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 44faabc3c02c..357ced3c33ff 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
572 s64 delta; 572 s64 delta;
573 573
574again: 574again:
575 prev_raw_count = atomic64_read(&hwc->prev_count); 575 prev_raw_count = local64_read(&hwc->prev_count);
576 new_raw_count = read_pmc(idx); 576 new_raw_count = read_pmc(idx);
577 577
578 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 578 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
579 new_raw_count) != prev_raw_count) 579 new_raw_count) != prev_raw_count)
580 goto again; 580 goto again;
581 581
582 delta = (new_raw_count << shift) - (prev_raw_count << shift); 582 delta = (new_raw_count << shift) - (prev_raw_count << shift);
583 delta >>= shift; 583 delta >>= shift;
584 584
585 atomic64_add(delta, &event->count); 585 local64_add(delta, &event->count);
586 atomic64_sub(delta, &hwc->period_left); 586 local64_sub(delta, &hwc->period_left);
587 587
588 return new_raw_count; 588 return new_raw_count;
589} 589}
@@ -591,27 +591,27 @@ again:
591static int sparc_perf_event_set_period(struct perf_event *event, 591static int sparc_perf_event_set_period(struct perf_event *event,
592 struct hw_perf_event *hwc, int idx) 592 struct hw_perf_event *hwc, int idx)
593{ 593{
594 s64 left = atomic64_read(&hwc->period_left); 594 s64 left = local64_read(&hwc->period_left);
595 s64 period = hwc->sample_period; 595 s64 period = hwc->sample_period;
596 int ret = 0; 596 int ret = 0;
597 597
598 if (unlikely(left <= -period)) { 598 if (unlikely(left <= -period)) {
599 left = period; 599 left = period;
600 atomic64_set(&hwc->period_left, left); 600 local64_set(&hwc->period_left, left);
601 hwc->last_period = period; 601 hwc->last_period = period;
602 ret = 1; 602 ret = 1;
603 } 603 }
604 604
605 if (unlikely(left <= 0)) { 605 if (unlikely(left <= 0)) {
606 left += period; 606 left += period;
607 atomic64_set(&hwc->period_left, left); 607 local64_set(&hwc->period_left, left);
608 hwc->last_period = period; 608 hwc->last_period = period;
609 ret = 1; 609 ret = 1;
610 } 610 }
611 if (left > MAX_PERIOD) 611 if (left > MAX_PERIOD)
612 left = MAX_PERIOD; 612 left = MAX_PERIOD;
613 613
614 atomic64_set(&hwc->prev_count, (u64)-left); 614 local64_set(&hwc->prev_count, (u64)-left);
615 615
616 write_pmc(idx, (u64)(-left) & 0xffffffff); 616 write_pmc(idx, (u64)(-left) & 0xffffffff);
617 617
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
1006 * skip the schedulability test here, it will be peformed 1006 * skip the schedulability test here, it will be peformed
1007 * at commit time(->commit_txn) as a whole 1007 * at commit time(->commit_txn) as a whole
1008 */ 1008 */
1009 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1009 if (cpuc->group_flag & PERF_EVENT_TXN)
1010 goto nocheck; 1010 goto nocheck;
1011 1011
1012 if (check_excludes(cpuc->event, n0, 1)) 1012 if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1088 if (!hwc->sample_period) { 1088 if (!hwc->sample_period) {
1089 hwc->sample_period = MAX_PERIOD; 1089 hwc->sample_period = MAX_PERIOD;
1090 hwc->last_period = hwc->sample_period; 1090 hwc->last_period = hwc->sample_period;
1091 atomic64_set(&hwc->period_left, hwc->sample_period); 1091 local64_set(&hwc->period_left, hwc->sample_period);
1092 } 1092 }
1093 1093
1094 return 0; 1094 return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1103{ 1103{
1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1105 1105
1106 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 1106 cpuhw->group_flag |= PERF_EVENT_TXN;
1107} 1107}
1108 1108
1109/* 1109/*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1115{ 1115{
1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1117 1117
1118 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 1118 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1119} 1119}
1120 1120
1121/* 1121/*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1138 if (sparc_check_constraints(cpuc->event, cpuc->events, n)) 1138 if (sparc_check_constraints(cpuc->event, cpuc->events, n))
1139 return -EAGAIN; 1139 return -EAGAIN;
1140 1140
1141 cpuc->group_flag &= ~PERF_EVENT_TXN;
1141 return 0; 1142 return 0;
1142} 1143}
1143 1144
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 942255310e6a..528a11e8d3e3 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
20#include <linux/list.h> 20#include <linux/list.h>
21 21
22/* Available HW breakpoint length encodings */ 22/* Available HW breakpoint length encodings */
23#define X86_BREAKPOINT_LEN_X 0x00
23#define X86_BREAKPOINT_LEN_1 0x40 24#define X86_BREAKPOINT_LEN_1 0x40
24#define X86_BREAKPOINT_LEN_2 0x44 25#define X86_BREAKPOINT_LEN_2 0x44
25#define X86_BREAKPOINT_LEN_4 0x4c 26#define X86_BREAKPOINT_LEN_4 0x4c
26#define X86_BREAKPOINT_LEN_EXECUTE 0x40
27 27
28#ifdef CONFIG_X86_64 28#ifdef CONFIG_X86_64
29#define X86_BREAKPOINT_LEN_8 0x48 29#define X86_BREAKPOINT_LEN_8 0x48
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e0..6e742cc4251b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -68,8 +68,9 @@ union cpuid10_eax {
68 68
69union cpuid10_edx { 69union cpuid10_edx {
70 struct { 70 struct {
71 unsigned int num_counters_fixed:4; 71 unsigned int num_counters_fixed:5;
72 unsigned int reserved:28; 72 unsigned int bit_width_fixed:8;
73 unsigned int reserved:19;
73 } split; 74 } split;
74 unsigned int full; 75 unsigned int full;
75}; 76};
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
140extern unsigned long perf_misc_flags(struct pt_regs *regs); 141extern unsigned long perf_misc_flags(struct pt_regs *regs);
141#define perf_misc_flags(regs) perf_misc_flags(regs) 142#define perf_misc_flags(regs) perf_misc_flags(regs)
142 143
144#include <asm/stacktrace.h>
145
146/*
147 * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
148 * and the comment with PERF_EFLAGS_EXACT.
149 */
150#define perf_arch_fetch_caller_regs(regs, __ip) { \
151 (regs)->ip = (__ip); \
152 (regs)->bp = caller_frame_pointer(); \
153 (regs)->cs = __KERNEL_CS; \
154 regs->flags = 0; \
155}
156
143#else 157#else
144static inline void init_hw_perf_events(void) { } 158static inline void init_hw_perf_events(void) { }
145static inline void perf_events_lapic_init(void) { } 159static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 64a8ebff06fc..def500776b16 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -19,7 +19,6 @@
19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ 19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) 20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18) 21#define ARCH_P4_MAX_CCCR (18)
22#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
23 22
24#define P4_ESCR_EVENT_MASK 0x7e000000U 23#define P4_ESCR_EVENT_MASK 0x7e000000U
25#define P4_ESCR_EVENT_SHIFT 25 24#define P4_ESCR_EVENT_SHIFT 25
@@ -71,10 +70,6 @@
71#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
72#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
73 72
74/* Custom bits in reerved CCCR area */
75#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
76
77
78/* Non HT mask */ 73/* Non HT mask */
79#define P4_CCCR_MASK \ 74#define P4_CCCR_MASK \
80 (P4_CCCR_OVF | \ 75 (P4_CCCR_OVF | \
@@ -106,8 +101,7 @@
106 * ESCR and CCCR but rather an only packed value should 101 * ESCR and CCCR but rather an only packed value should
107 * be unpacked and written to a proper addresses 102 * be unpacked and written to a proper addresses
108 * 103 *
109 * the base idea is to pack as much info as 104 * the base idea is to pack as much info as possible
110 * possible
111 */ 105 */
112#define p4_config_pack_escr(v) (((u64)(v)) << 32) 106#define p4_config_pack_escr(v) (((u64)(v)) << 32)
113#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) 107#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
@@ -130,8 +124,6 @@
130 t; \ 124 t; \
131 }) 125 })
132 126
133#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
134
135#define P4_CONFIG_HT_SHIFT 63 127#define P4_CONFIG_HT_SHIFT 63
136#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
137 129
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
214 return escr; 206 return escr;
215} 207}
216 208
209/*
210 * This are the events which should be used in "Event Select"
211 * field of ESCR register, they are like unique keys which allow
212 * the kernel to determinate which CCCR and COUNTER should be
213 * used to track an event
214 */
217enum P4_EVENTS { 215enum P4_EVENTS {
218 P4_EVENT_TC_DELIVER_MODE, 216 P4_EVENT_TC_DELIVER_MODE,
219 P4_EVENT_BPU_FETCH_REQUEST, 217 P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
561 * a caller should use P4_ESCR_EMASK_NAME helper to 559 * a caller should use P4_ESCR_EMASK_NAME helper to
562 * pick the EventMask needed, for example 560 * pick the EventMask needed, for example
563 * 561 *
564 * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) 562 * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
565 */ 563 */
566enum P4_ESCR_EMASKS { 564enum P4_ESCR_EMASKS {
567 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), 565 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
753 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), 751 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
754}; 752};
755 753
756/* P4 PEBS: stale for a while */ 754/*
757#define P4_PEBS_METRIC_MASK 0x00001fffU 755 * P4 PEBS specifics (Replay Event only)
758#define P4_PEBS_UOB_TAG 0x01000000U 756 *
759#define P4_PEBS_ENABLE 0x02000000U 757 * Format (bits):
760 758 * 0-6: metric from P4_PEBS_METRIC enum
761/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ 759 * 7 : reserved
762#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 760 * 8 : reserved
763#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 761 * 9-11 : reserved
764#define P4_PEBS__dtlb_load_miss_retired 0x3000004 762 *
765#define P4_PEBS__dtlb_store_miss_retired 0x3000004 763 * Note we have UOP and PEBS bits reserved for now
766#define P4_PEBS__dtlb_all_miss_retired 0x3000004 764 * just in case if we will need them once
767#define P4_PEBS__tagged_mispred_branch 0x3018000 765 */
768#define P4_PEBS__mob_load_replay_retired 0x3000200 766#define P4_PEBS_CONFIG_ENABLE (1 << 7)
769#define P4_PEBS__split_load_retired 0x3000400 767#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
770#define P4_PEBS__split_store_retired 0x3000400 768#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
771 769#define P4_PEBS_CONFIG_MASK 0xff
772#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 770
773#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 771/*
774#define P4_VERT__dtlb_load_miss_retired 0x0000001 772 * mem: Only counters MSR_IQ_COUNTER4 (16) and
775#define P4_VERT__dtlb_store_miss_retired 0x0000002 773 * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
776#define P4_VERT__dtlb_all_miss_retired 0x0000003 774 */
777#define P4_VERT__tagged_mispred_branch 0x0000010 775#define P4_PEBS_ENABLE 0x02000000U
778#define P4_VERT__mob_load_replay_retired 0x0000001 776#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
779#define P4_VERT__split_load_retired 0x0000001 777
780#define P4_VERT__split_store_retired 0x0000002 778#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
781 779#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
782enum P4_CACHE_EVENTS { 780
783 P4_CACHE__NONE, 781#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask))
784 782
785 P4_CACHE__1stl_cache_load_miss_retired, 783enum P4_PEBS_METRIC {
786 P4_CACHE__2ndl_cache_load_miss_retired, 784 P4_PEBS_METRIC__none,
787 P4_CACHE__dtlb_load_miss_retired, 785
788 P4_CACHE__dtlb_store_miss_retired, 786 P4_PEBS_METRIC__1stl_cache_load_miss_retired,
789 P4_CACHE__itlb_reference_hit, 787 P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
790 P4_CACHE__itlb_reference_miss, 788 P4_PEBS_METRIC__dtlb_load_miss_retired,
791 789 P4_PEBS_METRIC__dtlb_store_miss_retired,
792 P4_CACHE__MAX 790 P4_PEBS_METRIC__dtlb_all_miss_retired,
791 P4_PEBS_METRIC__tagged_mispred_branch,
792 P4_PEBS_METRIC__mob_load_replay_retired,
793 P4_PEBS_METRIC__split_load_retired,
794 P4_PEBS_METRIC__split_store_retired,
795
796 P4_PEBS_METRIC__max
793}; 797};
794 798
795#endif /* PERF_EVENT_P4_H */ 799#endif /* PERF_EVENT_P4_H */
800
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 4dab78edbad9..2b16a2ad23dc 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -1,6 +1,13 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
1#ifndef _ASM_X86_STACKTRACE_H 6#ifndef _ASM_X86_STACKTRACE_H
2#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
3 8
9#include <linux/uaccess.h>
10
4extern int kstack_depth_to_print; 11extern int kstack_depth_to_print;
5 12
6struct thread_info; 13struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
42 unsigned long *stack, unsigned long bp, 49 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data); 50 const struct stacktrace_ops *ops, void *data);
44 51
52#ifdef CONFIG_X86_32
53#define STACKSLOTS_PER_LINE 8
54#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
55#else
56#define STACKSLOTS_PER_LINE 4
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif
59
60extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl);
63
64extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl);
67
68extern unsigned int code_bytes;
69
70/* The form of the top of the frame on the stack */
71struct stack_frame {
72 struct stack_frame *next_frame;
73 unsigned long return_address;
74};
75
76struct stack_frame_ia32 {
77 u32 next_frame;
78 u32 return_address;
79};
80
81static inline unsigned long caller_frame_pointer(void)
82{
83 struct stack_frame *frame;
84
85 get_bp(frame);
86
87#ifdef CONFIG_FRAME_POINTER
88 frame = frame->next_frame;
89#endif
90
91 return (unsigned long)frame;
92}
93
45#endif /* _ASM_X86_STACKTRACE_H */ 94#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a18..f2da20fda02d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
220 struct perf_event *event); 220 struct perf_event *event);
221 struct event_constraint *event_constraints; 221 struct event_constraint *event_constraints;
222 void (*quirks)(void); 222 void (*quirks)(void);
223 int perfctr_second_write;
223 224
224 int (*cpu_prepare)(int cpu); 225 int (*cpu_prepare)(int cpu);
225 void (*cpu_starting)(int cpu); 226 void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
295 * count to the generic event atomically: 296 * count to the generic event atomically:
296 */ 297 */
297again: 298again:
298 prev_raw_count = atomic64_read(&hwc->prev_count); 299 prev_raw_count = local64_read(&hwc->prev_count);
299 rdmsrl(hwc->event_base + idx, new_raw_count); 300 rdmsrl(hwc->event_base + idx, new_raw_count);
300 301
301 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 302 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
302 new_raw_count) != prev_raw_count) 303 new_raw_count) != prev_raw_count)
303 goto again; 304 goto again;
304 305
@@ -313,8 +314,8 @@ again:
313 delta = (new_raw_count << shift) - (prev_raw_count << shift); 314 delta = (new_raw_count << shift) - (prev_raw_count << shift);
314 delta >>= shift; 315 delta >>= shift;
315 316
316 atomic64_add(delta, &event->count); 317 local64_add(delta, &event->count);
317 atomic64_sub(delta, &hwc->period_left); 318 local64_sub(delta, &hwc->period_left);
318 319
319 return new_raw_count; 320 return new_raw_count;
320} 321}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
438 if (!hwc->sample_period) { 439 if (!hwc->sample_period) {
439 hwc->sample_period = x86_pmu.max_period; 440 hwc->sample_period = x86_pmu.max_period;
440 hwc->last_period = hwc->sample_period; 441 hwc->last_period = hwc->sample_period;
441 atomic64_set(&hwc->period_left, hwc->sample_period); 442 local64_set(&hwc->period_left, hwc->sample_period);
442 } else { 443 } else {
443 /* 444 /*
444 * If we have a PMU initialized but no APIC 445 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
885x86_perf_event_set_period(struct perf_event *event) 886x86_perf_event_set_period(struct perf_event *event)
886{ 887{
887 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
888 s64 left = atomic64_read(&hwc->period_left); 889 s64 left = local64_read(&hwc->period_left);
889 s64 period = hwc->sample_period; 890 s64 period = hwc->sample_period;
890 int ret = 0, idx = hwc->idx; 891 int ret = 0, idx = hwc->idx;
891 892
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
897 */ 898 */
898 if (unlikely(left <= -period)) { 899 if (unlikely(left <= -period)) {
899 left = period; 900 left = period;
900 atomic64_set(&hwc->period_left, left); 901 local64_set(&hwc->period_left, left);
901 hwc->last_period = period; 902 hwc->last_period = period;
902 ret = 1; 903 ret = 1;
903 } 904 }
904 905
905 if (unlikely(left <= 0)) { 906 if (unlikely(left <= 0)) {
906 left += period; 907 left += period;
907 atomic64_set(&hwc->period_left, left); 908 local64_set(&hwc->period_left, left);
908 hwc->last_period = period; 909 hwc->last_period = period;
909 ret = 1; 910 ret = 1;
910 } 911 }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
923 * The hw event starts counting from this event offset, 924 * The hw event starts counting from this event offset,
924 * mark it to be able to extra future deltas: 925 * mark it to be able to extra future deltas:
925 */ 926 */
926 atomic64_set(&hwc->prev_count, (u64)-left); 927 local64_set(&hwc->prev_count, (u64)-left);
927 928
928 wrmsrl(hwc->event_base + idx, 929 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
930
931 /*
932 * Due to erratum on certan cpu we need
933 * a second write to be sure the register
934 * is updated properly
935 */
936 if (x86_pmu.perfctr_second_write) {
937 wrmsrl(hwc->event_base + idx,
929 (u64)(-left) & x86_pmu.cntval_mask); 938 (u64)(-left) & x86_pmu.cntval_mask);
939 }
930 940
931 perf_event_update_userpage(event); 941 perf_event_update_userpage(event);
932 942
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
969 * skip the schedulability test here, it will be peformed 979 * skip the schedulability test here, it will be peformed
970 * at commit time(->commit_txn) as a whole 980 * at commit time(->commit_txn) as a whole
971 */ 981 */
972 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 982 if (cpuc->group_flag & PERF_EVENT_TXN)
973 goto out; 983 goto out;
974 984
975 ret = x86_pmu.schedule_events(cpuc, n, assign); 985 ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
1096 * The events never got scheduled and ->cancel_txn will truncate 1106 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list. 1107 * the event_list.
1098 */ 1108 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1109 if (cpuc->group_flag & PERF_EVENT_TXN)
1100 return; 1110 return;
1101 1111
1102 x86_pmu_stop(event); 1112 x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1388{ 1398{
1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1399 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1390 1400
1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1401 cpuc->group_flag |= PERF_EVENT_TXN;
1392 cpuc->n_txn = 0; 1402 cpuc->n_txn = 0;
1393} 1403}
1394 1404
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1401{ 1411{
1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1412 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1403 1413
1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1414 cpuc->group_flag &= ~PERF_EVENT_TXN;
1405 /* 1415 /*
1406 * Truncate the collected events. 1416 * Truncate the collected events.
1407 */ 1417 */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1435 */ 1445 */
1436 memcpy(cpuc->assign, assign, n*sizeof(int)); 1446 memcpy(cpuc->assign, assign, n*sizeof(int));
1437 1447
1438 /* 1448 cpuc->group_flag &= ~PERF_EVENT_TXN;
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443 1449
1444 return 0; 1450 return 0;
1445} 1451}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
1607 .walk_stack = print_context_stack_bp, 1613 .walk_stack = print_context_stack_bp,
1608}; 1614};
1609 1615
1610#include "../dumpstack.h"
1611
1612static void 1616static void
1613perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1617perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1614{ 1618{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1730 return entry; 1734 return entry;
1731} 1735}
1732 1736
1733void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1734{
1735 regs->ip = ip;
1736 /*
1737 * perf_arch_fetch_caller_regs adds another call, we need to increment
1738 * the skip level
1739 */
1740 regs->bp = rewind_frame_pointer(skip + 1);
1741 regs->cs = __KERNEL_CS;
1742 /*
1743 * We abuse bit 3 to pass exact information, see perf_misc_flags
1744 * and the comment with PERF_EFLAGS_EXACT.
1745 */
1746 regs->flags = 0;
1747}
1748
1749unsigned long perf_instruction_pointer(struct pt_regs *regs) 1737unsigned long perf_instruction_pointer(struct pt_regs *regs)
1750{ 1738{
1751 unsigned long ip; 1739 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d1..107711bf0ee8 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -21,22 +21,36 @@ struct p4_event_bind {
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 22};
23 23
24struct p4_cache_event_bind { 24struct p4_pebs_bind {
25 unsigned int metric_pebs; 25 unsigned int metric_pebs;
26 unsigned int metric_vert; 26 unsigned int metric_vert;
27}; 27};
28 28
29#define P4_GEN_CACHE_EVENT_BIND(name) \ 29/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
30 [P4_CACHE__##name] = { \ 30#define P4_GEN_PEBS_BIND(name, pebs, vert) \
31 .metric_pebs = P4_PEBS__##name, \ 31 [P4_PEBS_METRIC__##name] = { \
32 .metric_vert = P4_VERT__##name, \ 32 .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
33 .metric_vert = vert, \
33 } 34 }
34 35
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = { 36/*
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), 37 * note we have P4_PEBS_ENABLE_UOP_TAG always set here
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), 38 *
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), 39 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), 40 * event configuration to find out which values are to be
41 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
42 * resgisters
43 */
44static struct p4_pebs_bind p4_pebs_bind_map[] = {
45 P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
46 P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
47 P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
48 P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
49 P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
50 P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
51 P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
52 P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
53 P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
40}; 54};
41 55
42/* 56/*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
281 }, 295 },
282}; 296};
283 297
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ 298#define P4_GEN_CACHE_EVENT(event, bit, metric) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \ 299 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \ 300 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \ 301 p4_config_pack_cccr(metric | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) 302 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289 303
290static __initconst const u64 p4_hw_cache_event_ids 304static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
296 [ C(OP_READ) ] = { 310 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0, 311 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired), 313 P4_PEBS_METRIC__1stl_cache_load_miss_retired),
300 }, 314 },
301 }, 315 },
302 [ C(LL ) ] = { 316 [ C(LL ) ] = {
303 [ C(OP_READ) ] = { 317 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0, 318 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 319 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired), 320 P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
307 }, 321 },
308}, 322},
309 [ C(DTLB) ] = { 323 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = { 324 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0, 325 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 326 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired), 327 P4_PEBS_METRIC__dtlb_load_miss_retired),
314 }, 328 },
315 [ C(OP_WRITE) ] = { 329 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0, 330 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 331 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired), 332 P4_PEBS_METRIC__dtlb_store_miss_retired),
319 }, 333 },
320 }, 334 },
321 [ C(ITLB) ] = { 335 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = { 336 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, 337 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit), 338 P4_PEBS_METRIC__none),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, 339 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss), 340 P4_PEBS_METRIC__none),
327 }, 341 },
328 [ C(OP_WRITE) ] = { 342 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1, 343 [ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
414 return config; 428 return config;
415} 429}
416 430
431static int p4_validate_raw_event(struct perf_event *event)
432{
433 unsigned int v;
434
435 /* user data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) {
438 pr_warning("P4 PMU: Unknown event code: %d\n", v);
439 return -EINVAL;
440 }
441
442 /*
443 * it may have some screwed PEBS bits
444 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL;
448 }
449 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL;
453 }
454
455 return 0;
456}
457
417static int p4_hw_config(struct perf_event *event) 458static int p4_hw_config(struct perf_event *event)
418{ 459{
419 int cpu = get_cpu(); 460 int cpu = get_cpu();
420 int rc = 0; 461 int rc = 0;
421 unsigned int evnt;
422 u32 escr, cccr; 462 u32 escr, cccr;
423 463
424 /* 464 /*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
438 478
439 if (event->attr.type == PERF_TYPE_RAW) { 479 if (event->attr.type == PERF_TYPE_RAW) {
440 480
441 /* user data may have out-of-bound event index */ 481 rc = p4_validate_raw_event(event);
442 evnt = p4_config_unpack_event(event->attr.config); 482 if (rc)
443 if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
444 rc = -EINVAL;
445 goto out; 483 goto out;
446 }
447 484
448 /* 485 /*
449 * We don't control raw events so it's up to the caller 486 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
451 * on HT machine but allow HT-compatible specifics to be 488 * on HT machine but allow HT-compatible specifics to be
452 * passed on) 489 * passed on)
453 * 490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc)
493 *
454 * XXX: HT wide things should check perf_paranoid_cpu() && 494 * XXX: HT wide things should check perf_paranoid_cpu() &&
455 * CAP_SYS_ADMIN 495 * CAP_SYS_ADMIN
456 */ 496 */
457 event->hw.config |= event->attr.config & 497 event->hw.config |= event->attr.config &
458 (p4_config_pack_escr(P4_ESCR_MASK_HT) | 498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
459 p4_config_pack_cccr(P4_CCCR_MASK_HT)); 499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
460 } 500 }
461 501
462 rc = x86_setup_perfctr(event); 502 rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
482 return overflow; 522 return overflow;
483} 523}
484 524
525static void p4_pmu_disable_pebs(void)
526{
527 /*
528 * FIXME
529 *
530 * It's still allowed that two threads setup same cache
531 * events so we can't simply clear metrics until we knew
532 * noone is depending on us, so we need kind of counter
533 * for "ReplayEvent" users.
534 *
535 * What is more complex -- RAW events, if user (for some
536 * reason) will pass some cache event metric with improper
537 * event opcode -- it's fine from hardware point of view
538 * but completely nonsence from "meaning" of such action.
539 *
540 * So at moment let leave metrics turned on forever -- it's
541 * ok for now but need to be revisited!
542 *
543 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
544 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
545 */
546}
547
485static inline void p4_pmu_disable_event(struct perf_event *event) 548static inline void p4_pmu_disable_event(struct perf_event *event)
486{ 549{
487 struct hw_perf_event *hwc = &event->hw; 550 struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
507 continue; 570 continue;
508 p4_pmu_disable_event(event); 571 p4_pmu_disable_event(event);
509 } 572 }
573
574 p4_pmu_disable_pebs();
575}
576
577/* configuration must be valid */
578static void p4_pmu_enable_pebs(u64 config)
579{
580 struct p4_pebs_bind *bind;
581 unsigned int idx;
582
583 BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
584
585 idx = p4_config_unpack_metric(config);
586 if (idx == P4_PEBS_METRIC__none)
587 return;
588
589 bind = &p4_pebs_bind_map[idx];
590
591 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
592 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
510} 593}
511 594
512static void p4_pmu_enable_event(struct perf_event *event) 595static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
515 int thread = p4_ht_config_thread(hwc->config); 598 int thread = p4_ht_config_thread(hwc->config);
516 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); 599 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
517 unsigned int idx = p4_config_unpack_event(hwc->config); 600 unsigned int idx = p4_config_unpack_event(hwc->config);
518 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
519 struct p4_event_bind *bind; 601 struct p4_event_bind *bind;
520 struct p4_cache_event_bind *bind_cache;
521 u64 escr_addr, cccr; 602 u64 escr_addr, cccr;
522 603
523 bind = &p4_event_bind_map[idx]; 604 bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
537 cccr = p4_config_unpack_cccr(hwc->config); 618 cccr = p4_config_unpack_cccr(hwc->config);
538 619
539 /* 620 /*
540 * it could be Cache event so that we need to 621 * it could be Cache event so we need to write metrics
541 * set metrics into additional MSRs 622 * into additional MSRs
542 */ 623 */
543 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); 624 p4_pmu_enable_pebs(hwc->config);
544 if (idx_cache > P4_CACHE__NONE &&
545 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
546 bind_cache = &p4_cache_event_bind_map[idx_cache];
547 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
548 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
549 }
550 625
551 (void)checking_wrmsrl(escr_addr, escr_conf); 626 (void)checking_wrmsrl(escr_addr, escr_conf);
552 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 627 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
829 .max_period = (1ULL << 39) - 1, 904 .max_period = (1ULL << 39) - 1,
830 .hw_config = p4_hw_config, 905 .hw_config = p4_hw_config,
831 .schedule_events = p4_pmu_schedule_events, 906 .schedule_events = p4_pmu_schedule_events,
907 /*
908 * This handles erratum N15 in intel doc 249199-029,
909 * the counter may not be updated correctly on write
910 * so we need a second write operation to do the trick
911 * (the official workaround didn't work)
912 *
913 * the former idea is taken from OProfile code
914 */
915 .perfctr_second_write = 1,
832}; 916};
833 917
834static __init int p4_pmu_init(void) 918static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b7..6e8752c1bd52 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20
21#include "dumpstack.h"
22 21
23int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
24int panic_on_io_nmi; 23int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd44..000000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17#include <linux/uaccess.h>
18
19extern void
20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
21 unsigned long *stack, unsigned long bp, char *log_lvl);
22
23extern void
24show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *sp, unsigned long bp, char *log_lvl);
26
27extern unsigned int code_bytes;
28
29/* The form of the top of the frame on the stack */
30struct stack_frame {
31 struct stack_frame *next_frame;
32 unsigned long return_address;
33};
34
35struct stack_frame_ia32 {
36 u32 next_frame;
37 u32 return_address;
38};
39
40static inline unsigned long rewind_frame_pointer(int n)
41{
42 struct stack_frame *frame;
43
44 get_bp(frame);
45
46#ifdef CONFIG_FRAME_POINTER
47 while (n--) {
48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
51#endif
52
53 return (unsigned long)frame;
54}
55
56#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d93..0f6376ffa2d9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20
21 19
22void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 21 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f3..57a21f11c791 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20 19
21#define N_EXCEPTION_STACKS_END \ 20#define N_EXCEPTION_STACKS_END \
22 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2fd..a474ec37c32f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
208{ 208{
209 /* Len */ 209 /* Len */
210 switch (x86_len) { 210 switch (x86_len) {
211 case X86_BREAKPOINT_LEN_X:
212 *gen_len = sizeof(long);
213 break;
211 case X86_BREAKPOINT_LEN_1: 214 case X86_BREAKPOINT_LEN_1:
212 *gen_len = HW_BREAKPOINT_LEN_1; 215 *gen_len = HW_BREAKPOINT_LEN_1;
213 break; 216 break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
251 254
252 info->address = bp->attr.bp_addr; 255 info->address = bp->attr.bp_addr;
253 256
257 /* Type */
258 switch (bp->attr.bp_type) {
259 case HW_BREAKPOINT_W:
260 info->type = X86_BREAKPOINT_WRITE;
261 break;
262 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
263 info->type = X86_BREAKPOINT_RW;
264 break;
265 case HW_BREAKPOINT_X:
266 info->type = X86_BREAKPOINT_EXECUTE;
267 /*
268 * x86 inst breakpoints need to have a specific undefined len.
269 * But we still need to check userspace is not trying to setup
270 * an unsupported length, to get a range breakpoint for example.
271 */
272 if (bp->attr.bp_len == sizeof(long)) {
273 info->len = X86_BREAKPOINT_LEN_X;
274 return 0;
275 }
276 default:
277 return -EINVAL;
278 }
279
254 /* Len */ 280 /* Len */
255 switch (bp->attr.bp_len) { 281 switch (bp->attr.bp_len) {
256 case HW_BREAKPOINT_LEN_1: 282 case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
271 return -EINVAL; 297 return -EINVAL;
272 } 298 }
273 299
274 /* Type */
275 switch (bp->attr.bp_type) {
276 case HW_BREAKPOINT_W:
277 info->type = X86_BREAKPOINT_WRITE;
278 break;
279 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
280 info->type = X86_BREAKPOINT_RW;
281 break;
282 case HW_BREAKPOINT_X:
283 info->type = X86_BREAKPOINT_EXECUTE;
284 break;
285 default:
286 return -EINVAL;
287 }
288
289 return 0; 300 return 0;
290} 301}
291/* 302/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
305 ret = -EINVAL; 316 ret = -EINVAL;
306 317
307 switch (info->len) { 318 switch (info->len) {
319 case X86_BREAKPOINT_LEN_X:
320 align = sizeof(long) -1;
321 break;
308 case X86_BREAKPOINT_LEN_1: 322 case X86_BREAKPOINT_LEN_1:
309 align = 0; 323 align = 0;
310 break; 324 break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
466 480
467 perf_bp_event(bp, args->regs); 481 perf_bp_event(bp, args->regs);
468 482
483 /*
484 * Set up resume flag to avoid breakpoint recursion when
485 * returning back to origin.
486 */
487 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
488 args->regs->flags |= X86_EFLAGS_RF;
489
469 rcu_read_unlock(); 490 rcu_read_unlock();
470 } 491 }
471 /* 492 /*
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 675879b65ce6..1bfb6cf4dd55 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
126} 126}
127 127
128/* 128/*
129 * Check for the REX prefix which can only exist on X86_64 129 * Skip the prefixes of the instruction.
130 * X86_32 always returns 0
131 */ 130 */
132static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) 131static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
133{ 132{
133 insn_attr_t attr;
134
135 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
136 while (inat_is_legacy_prefix(attr)) {
137 insn++;
138 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
139 }
134#ifdef CONFIG_X86_64 140#ifdef CONFIG_X86_64
135 if ((*insn & 0xf0) == 0x40) 141 if (inat_is_rex_prefix(attr))
136 return 1; 142 insn++;
137#endif 143#endif
138 return 0; 144 return insn;
139} 145}
140 146
141/* 147/*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
272 */ 278 */
273static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) 279static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
274{ 280{
281 /* Skip prefixes */
282 insn = skip_prefixes(insn);
283
275 switch (*insn) { 284 switch (*insn) {
276 case 0xfa: /* cli */ 285 case 0xfa: /* cli */
277 case 0xfb: /* sti */ 286 case 0xfb: /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
280 return 1; 289 return 1;
281 } 290 }
282 291
283 /*
284 * on X86_64, 0x40-0x4f are REX prefixes so we need to look
285 * at the next byte instead.. but of course not recurse infinitely
286 */
287 if (is_REX_prefix(insn))
288 return is_IF_modifier(++insn);
289
290 return 0; 292 return 0;
291} 293}
292 294
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
803 unsigned long orig_ip = (unsigned long)p->addr; 805 unsigned long orig_ip = (unsigned long)p->addr;
804 kprobe_opcode_t *insn = p->ainsn.insn; 806 kprobe_opcode_t *insn = p->ainsn.insn;
805 807
806 /*skip the REX prefix*/ 808 /* Skip prefixes */
807 if (is_REX_prefix(insn)) 809 insn = skip_prefixes(insn);
808 insn++;
809 810
810 regs->flags &= ~X86_EFLAGS_TF; 811 regs->flags &= ~X86_EFLAGS_TF;
811 switch (*insn) { 812 switch (*insn) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af47..96586c3cbbbf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
57#include <asm/syscalls.h> 57#include <asm/syscalls.h>
58#include <asm/debugreg.h> 58#include <asm/debugreg.h>
59 59
60#include <trace/events/power.h>
61
60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
61 63
62/* 64/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
111 stop_critical_timings(); 113 stop_critical_timings();
112 pm_idle(); 114 pm_idle();
113 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id());
114 } 118 }
115 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
116 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1f..3d9ea531ddd1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
51#include <asm/syscalls.h> 51#include <asm/syscalls.h>
52#include <asm/debugreg.h> 52#include <asm/debugreg.h>
53 53
54#include <trace/events/power.h>
55
54asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
55 57
56DEFINE_PER_CPU(unsigned long, old_rsp); 58DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
138 stop_critical_timings(); 140 stop_critical_timings();
139 pm_idle(); 141 pm_idle();
140 start_critical_timings(); 142 start_critical_timings();
143
144 trace_power_end(smp_processor_id());
145
141 /* In many cases the interrupt that ended idle 146 /* In many cases the interrupt that ended idle
142 has already called exit_idle. But some idle 147 has already called exit_idle. But some idle
143 loops can be woken up without interrupt. */ 148 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6c..b53c525368a7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
23 return 0; 23 return 0;
24} 24}
25 25
26static void save_stack_address(void *data, unsigned long addr, int reliable) 26static void
27__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
27{ 28{
28 struct stack_trace *trace = data; 29 struct stack_trace *trace = data;
30#ifdef CONFIG_FRAME_POINTER
29 if (!reliable) 31 if (!reliable)
30 return; 32 return;
33#endif
34 if (nosched && in_sched_functions(addr))
35 return;
31 if (trace->skip > 0) { 36 if (trace->skip > 0) {
32 trace->skip--; 37 trace->skip--;
33 return; 38 return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
36 trace->entries[trace->nr_entries++] = addr; 41 trace->entries[trace->nr_entries++] = addr;
37} 42}
38 43
44static void save_stack_address(void *data, unsigned long addr, int reliable)
45{
46 return __save_stack_address(data, addr, reliable, false);
47}
48
39static void 49static void
40save_stack_address_nosched(void *data, unsigned long addr, int reliable) 50save_stack_address_nosched(void *data, unsigned long addr, int reliable)
41{ 51{
42 struct stack_trace *trace = (struct stack_trace *)data; 52 return __save_stack_address(data, addr, reliable, true);
43 if (!reliable)
44 return;
45 if (in_sched_functions(addr))
46 return;
47 if (trace->skip > 0) {
48 trace->skip--;
49 return;
50 }
51 if (trace->nr_entries < trace->max_entries)
52 trace->entries[trace->nr_entries++] = addr;
53} 53}
54 54
55static const struct stacktrace_ops save_stack_ops = { 55static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
96 96
97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ 97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
98 98
99struct stack_frame { 99struct stack_frame_user {
100 const void __user *next_fp; 100 const void __user *next_fp;
101 unsigned long ret_addr; 101 unsigned long ret_addr;
102}; 102};
103 103
104static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 104static int
105copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
105{ 106{
106 int ret; 107 int ret;
107 108
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
126 trace->entries[trace->nr_entries++] = regs->ip; 127 trace->entries[trace->nr_entries++] = regs->ip;
127 128
128 while (trace->nr_entries < trace->max_entries) { 129 while (trace->nr_entries < trace->max_entries) {
129 struct stack_frame frame; 130 struct stack_frame_user frame;
130 131
131 frame.next_fp = NULL; 132 frame.next_fp = NULL;
132 frame.ret_addr = 0; 133 frame.ret_addr = 0;
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/xtensa/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>